user_namespace.c 36 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. #include <linux/export.h>
  3. #include <linux/nsproxy.h>
  4. #include <linux/slab.h>
  5. #include <linux/sched/signal.h>
  6. #include <linux/user_namespace.h>
  7. #include <linux/proc_ns.h>
  8. #include <linux/highuid.h>
  9. #include <linux/cred.h>
  10. #include <linux/securebits.h>
  11. #include <linux/security.h>
  12. #include <linux/keyctl.h>
  13. #include <linux/key-type.h>
  14. #include <keys/user-type.h>
  15. #include <linux/seq_file.h>
  16. #include <linux/fs.h>
  17. #include <linux/uaccess.h>
  18. #include <linux/ctype.h>
  19. #include <linux/projid.h>
  20. #include <linux/fs_struct.h>
  21. #include <linux/bsearch.h>
  22. #include <linux/sort.h>
  23. #include <linux/nstree.h>
  24. static struct kmem_cache *user_ns_cachep __ro_after_init;
  25. static DEFINE_MUTEX(userns_state_mutex);
  26. static bool new_idmap_permitted(const struct file *file,
  27. struct user_namespace *ns, int cap_setid,
  28. struct uid_gid_map *map);
  29. static void free_user_ns(struct work_struct *work);
  30. static struct ucounts *inc_user_namespaces(struct user_namespace *ns, kuid_t uid)
  31. {
  32. return inc_ucount(ns, uid, UCOUNT_USER_NAMESPACES);
  33. }
  34. static void dec_user_namespaces(struct ucounts *ucounts)
  35. {
  36. return dec_ucount(ucounts, UCOUNT_USER_NAMESPACES);
  37. }
  38. static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
  39. {
  40. /* Start with the same capabilities as init but useless for doing
  41. * anything as the capabilities are bound to the new user namespace.
  42. */
  43. cred->securebits = SECUREBITS_DEFAULT;
  44. cred->cap_inheritable = CAP_EMPTY_SET;
  45. cred->cap_permitted = CAP_FULL_SET;
  46. cred->cap_effective = CAP_FULL_SET;
  47. cred->cap_ambient = CAP_EMPTY_SET;
  48. cred->cap_bset = CAP_FULL_SET;
  49. #ifdef CONFIG_KEYS
  50. key_put(cred->request_key_auth);
  51. cred->request_key_auth = NULL;
  52. #endif
  53. /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
  54. cred->user_ns = user_ns;
  55. }
  56. static unsigned long enforced_nproc_rlimit(void)
  57. {
  58. unsigned long limit = RLIM_INFINITY;
  59. /* Is RLIMIT_NPROC currently enforced? */
  60. if (!uid_eq(current_uid(), GLOBAL_ROOT_UID) ||
  61. (current_user_ns() != &init_user_ns))
  62. limit = rlimit(RLIMIT_NPROC);
  63. return limit;
  64. }
  65. /*
  66. * Create a new user namespace, deriving the creator from the user in the
  67. * passed credentials, and replacing that user with the new root user for the
  68. * new namespace.
  69. *
  70. * This is called by copy_creds(), which will finish setting the target task's
  71. * credentials.
  72. */
  73. int create_user_ns(struct cred *new)
  74. {
  75. struct user_namespace *ns, *parent_ns = new->user_ns;
  76. kuid_t owner = new->euid;
  77. kgid_t group = new->egid;
  78. struct ucounts *ucounts;
  79. int ret, i;
  80. ret = -ENOSPC;
  81. if (parent_ns->level > 32)
  82. goto fail;
  83. ucounts = inc_user_namespaces(parent_ns, owner);
  84. if (!ucounts)
  85. goto fail;
  86. /*
  87. * Verify that we can not violate the policy of which files
  88. * may be accessed that is specified by the root directory,
  89. * by verifying that the root directory is at the root of the
  90. * mount namespace which allows all files to be accessed.
  91. */
  92. ret = -EPERM;
  93. if (current_chrooted())
  94. goto fail_dec;
  95. /* The creator needs a mapping in the parent user namespace
  96. * or else we won't be able to reasonably tell userspace who
  97. * created a user_namespace.
  98. */
  99. ret = -EPERM;
  100. if (!kuid_has_mapping(parent_ns, owner) ||
  101. !kgid_has_mapping(parent_ns, group))
  102. goto fail_dec;
  103. ret = security_create_user_ns(new);
  104. if (ret < 0)
  105. goto fail_dec;
  106. ret = -ENOMEM;
  107. ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
  108. if (!ns)
  109. goto fail_dec;
  110. ns->parent_could_setfcap = cap_raised(new->cap_effective, CAP_SETFCAP);
  111. ret = ns_common_init(ns);
  112. if (ret)
  113. goto fail_free;
  114. /* Leave the new->user_ns reference with the new user namespace. */
  115. ns->parent = parent_ns;
  116. ns->level = parent_ns->level + 1;
  117. ns->owner = owner;
  118. ns->group = group;
  119. INIT_WORK(&ns->work, free_user_ns);
  120. for (i = 0; i < UCOUNT_COUNTS; i++) {
  121. ns->ucount_max[i] = INT_MAX;
  122. }
  123. set_userns_rlimit_max(ns, UCOUNT_RLIMIT_NPROC, enforced_nproc_rlimit());
  124. set_userns_rlimit_max(ns, UCOUNT_RLIMIT_MSGQUEUE, rlimit(RLIMIT_MSGQUEUE));
  125. set_userns_rlimit_max(ns, UCOUNT_RLIMIT_SIGPENDING, rlimit(RLIMIT_SIGPENDING));
  126. set_userns_rlimit_max(ns, UCOUNT_RLIMIT_MEMLOCK, rlimit(RLIMIT_MEMLOCK));
  127. ns->ucounts = ucounts;
  128. /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
  129. mutex_lock(&userns_state_mutex);
  130. ns->flags = parent_ns->flags;
  131. mutex_unlock(&userns_state_mutex);
  132. #ifdef CONFIG_KEYS
  133. INIT_LIST_HEAD(&ns->keyring_name_list);
  134. init_rwsem(&ns->keyring_sem);
  135. #endif
  136. ret = -ENOMEM;
  137. if (!setup_userns_sysctls(ns))
  138. goto fail_keyring;
  139. set_cred_user_ns(new, ns);
  140. ns_tree_add(ns);
  141. return 0;
  142. fail_keyring:
  143. #ifdef CONFIG_PERSISTENT_KEYRINGS
  144. key_put(ns->persistent_keyring_register);
  145. #endif
  146. ns_common_free(ns);
  147. fail_free:
  148. kmem_cache_free(user_ns_cachep, ns);
  149. fail_dec:
  150. dec_user_namespaces(ucounts);
  151. fail:
  152. return ret;
  153. }
  154. int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
  155. {
  156. struct cred *cred;
  157. int err = -ENOMEM;
  158. if (!(unshare_flags & CLONE_NEWUSER))
  159. return 0;
  160. cred = prepare_creds();
  161. if (cred) {
  162. err = create_user_ns(cred);
  163. if (err)
  164. put_cred(cred);
  165. else
  166. *new_cred = cred;
  167. }
  168. return err;
  169. }
  170. static void free_user_ns(struct work_struct *work)
  171. {
  172. struct user_namespace *parent, *ns =
  173. container_of(work, struct user_namespace, work);
  174. do {
  175. struct ucounts *ucounts = ns->ucounts;
  176. parent = ns->parent;
  177. ns_tree_remove(ns);
  178. if (ns->gid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
  179. kfree(ns->gid_map.forward);
  180. kfree(ns->gid_map.reverse);
  181. }
  182. if (ns->uid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
  183. kfree(ns->uid_map.forward);
  184. kfree(ns->uid_map.reverse);
  185. }
  186. if (ns->projid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
  187. kfree(ns->projid_map.forward);
  188. kfree(ns->projid_map.reverse);
  189. }
  190. #if IS_ENABLED(CONFIG_BINFMT_MISC)
  191. kfree(ns->binfmt_misc);
  192. #endif
  193. retire_userns_sysctls(ns);
  194. key_free_user_ns(ns);
  195. ns_common_free(ns);
  196. /* Concurrent nstree traversal depends on a grace period. */
  197. kfree_rcu(ns, ns.ns_rcu);
  198. dec_user_namespaces(ucounts);
  199. ns = parent;
  200. } while (ns_ref_put(parent));
  201. }
  202. void __put_user_ns(struct user_namespace *ns)
  203. {
  204. schedule_work(&ns->work);
  205. }
  206. EXPORT_SYMBOL(__put_user_ns);
  207. /*
  208. * struct idmap_key - holds the information necessary to find an idmapping in a
  209. * sorted idmap array. It is passed to cmp_map_id() as first argument.
  210. */
  211. struct idmap_key {
  212. bool map_up; /* true -> id from kid; false -> kid from id */
  213. u32 id; /* id to find */
  214. u32 count;
  215. };
  216. /*
  217. * cmp_map_id - Function to be passed to bsearch() to find the requested
  218. * idmapping. Expects struct idmap_key to be passed via @k.
  219. */
  220. static int cmp_map_id(const void *k, const void *e)
  221. {
  222. u32 first, last, id2;
  223. const struct idmap_key *key = k;
  224. const struct uid_gid_extent *el = e;
  225. id2 = key->id + key->count - 1;
  226. /* handle map_id_{down,up}() */
  227. if (key->map_up)
  228. first = el->lower_first;
  229. else
  230. first = el->first;
  231. last = first + el->count - 1;
  232. if (key->id >= first && key->id <= last &&
  233. (id2 >= first && id2 <= last))
  234. return 0;
  235. if (key->id < first || id2 < first)
  236. return -1;
  237. return 1;
  238. }
  239. /*
  240. * map_id_range_down_max - Find idmap via binary search in ordered idmap array.
  241. * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
  242. */
  243. static struct uid_gid_extent *
  244. map_id_range_down_max(unsigned extents, struct uid_gid_map *map, u32 id, u32 count)
  245. {
  246. struct idmap_key key;
  247. key.map_up = false;
  248. key.count = count;
  249. key.id = id;
  250. return bsearch(&key, map->forward, extents,
  251. sizeof(struct uid_gid_extent), cmp_map_id);
  252. }
  253. /*
  254. * map_id_range_down_base - Find idmap via binary search in static extent array.
  255. * Can only be called if number of mappings is equal or less than
  256. * UID_GID_MAP_MAX_BASE_EXTENTS.
  257. */
  258. static struct uid_gid_extent *
  259. map_id_range_down_base(unsigned extents, struct uid_gid_map *map, u32 id, u32 count)
  260. {
  261. unsigned idx;
  262. u32 first, last, id2;
  263. id2 = id + count - 1;
  264. /* Find the matching extent */
  265. for (idx = 0; idx < extents; idx++) {
  266. first = map->extent[idx].first;
  267. last = first + map->extent[idx].count - 1;
  268. if (id >= first && id <= last &&
  269. (id2 >= first && id2 <= last))
  270. return &map->extent[idx];
  271. }
  272. return NULL;
  273. }
  274. static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
  275. {
  276. struct uid_gid_extent *extent;
  277. unsigned extents = map->nr_extents;
  278. smp_rmb();
  279. if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
  280. extent = map_id_range_down_base(extents, map, id, count);
  281. else
  282. extent = map_id_range_down_max(extents, map, id, count);
  283. /* Map the id or note failure */
  284. if (extent)
  285. id = (id - extent->first) + extent->lower_first;
  286. else
  287. id = (u32) -1;
  288. return id;
  289. }
  290. u32 map_id_down(struct uid_gid_map *map, u32 id)
  291. {
  292. return map_id_range_down(map, id, 1);
  293. }
  294. /*
  295. * map_id_up_base - Find idmap via binary search in static extent array.
  296. * Can only be called if number of mappings is equal or less than
  297. * UID_GID_MAP_MAX_BASE_EXTENTS.
  298. */
  299. static struct uid_gid_extent *
  300. map_id_range_up_base(unsigned extents, struct uid_gid_map *map, u32 id, u32 count)
  301. {
  302. unsigned idx;
  303. u32 first, last, id2;
  304. id2 = id + count - 1;
  305. /* Find the matching extent */
  306. for (idx = 0; idx < extents; idx++) {
  307. first = map->extent[idx].lower_first;
  308. last = first + map->extent[idx].count - 1;
  309. if (id >= first && id <= last &&
  310. (id2 >= first && id2 <= last))
  311. return &map->extent[idx];
  312. }
  313. return NULL;
  314. }
  315. /*
  316. * map_id_up_max - Find idmap via binary search in ordered idmap array.
  317. * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
  318. */
  319. static struct uid_gid_extent *
  320. map_id_range_up_max(unsigned extents, struct uid_gid_map *map, u32 id, u32 count)
  321. {
  322. struct idmap_key key;
  323. key.map_up = true;
  324. key.count = count;
  325. key.id = id;
  326. return bsearch(&key, map->reverse, extents,
  327. sizeof(struct uid_gid_extent), cmp_map_id);
  328. }
  329. u32 map_id_range_up(struct uid_gid_map *map, u32 id, u32 count)
  330. {
  331. struct uid_gid_extent *extent;
  332. unsigned extents = map->nr_extents;
  333. smp_rmb();
  334. if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
  335. extent = map_id_range_up_base(extents, map, id, count);
  336. else
  337. extent = map_id_range_up_max(extents, map, id, count);
  338. /* Map the id or note failure */
  339. if (extent)
  340. id = (id - extent->lower_first) + extent->first;
  341. else
  342. id = (u32) -1;
  343. return id;
  344. }
  345. u32 map_id_up(struct uid_gid_map *map, u32 id)
  346. {
  347. return map_id_range_up(map, id, 1);
  348. }
  349. /**
  350. * make_kuid - Map a user-namespace uid pair into a kuid.
  351. * @ns: User namespace that the uid is in
  352. * @uid: User identifier
  353. *
  354. * Maps a user-namespace uid pair into a kernel internal kuid,
  355. * and returns that kuid.
  356. *
  357. * When there is no mapping defined for the user-namespace uid
  358. * pair INVALID_UID is returned. Callers are expected to test
  359. * for and handle INVALID_UID being returned. INVALID_UID
  360. * may be tested for using uid_valid().
  361. */
  362. kuid_t make_kuid(struct user_namespace *ns, uid_t uid)
  363. {
  364. /* Map the uid to a global kernel uid */
  365. return KUIDT_INIT(map_id_down(&ns->uid_map, uid));
  366. }
  367. EXPORT_SYMBOL(make_kuid);
  368. /**
  369. * from_kuid - Create a uid from a kuid user-namespace pair.
  370. * @targ: The user namespace we want a uid in.
  371. * @kuid: The kernel internal uid to start with.
  372. *
  373. * Map @kuid into the user-namespace specified by @targ and
  374. * return the resulting uid.
  375. *
  376. * There is always a mapping into the initial user_namespace.
  377. *
  378. * If @kuid has no mapping in @targ (uid_t)-1 is returned.
  379. */
  380. uid_t from_kuid(struct user_namespace *targ, kuid_t kuid)
  381. {
  382. /* Map the uid from a global kernel uid */
  383. return map_id_up(&targ->uid_map, __kuid_val(kuid));
  384. }
  385. EXPORT_SYMBOL(from_kuid);
  386. /**
  387. * from_kuid_munged - Create a uid from a kuid user-namespace pair.
  388. * @targ: The user namespace we want a uid in.
  389. * @kuid: The kernel internal uid to start with.
  390. *
  391. * Map @kuid into the user-namespace specified by @targ and
  392. * return the resulting uid.
  393. *
  394. * There is always a mapping into the initial user_namespace.
  395. *
  396. * Unlike from_kuid from_kuid_munged never fails and always
  397. * returns a valid uid. This makes from_kuid_munged appropriate
  398. * for use in syscalls like stat and getuid where failing the
  399. * system call and failing to provide a valid uid are not an
  400. * options.
  401. *
  402. * If @kuid has no mapping in @targ overflowuid is returned.
  403. */
  404. uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid)
  405. {
  406. uid_t uid;
  407. uid = from_kuid(targ, kuid);
  408. if (uid == (uid_t) -1)
  409. uid = overflowuid;
  410. return uid;
  411. }
  412. EXPORT_SYMBOL(from_kuid_munged);
  413. /**
  414. * make_kgid - Map a user-namespace gid pair into a kgid.
  415. * @ns: User namespace that the gid is in
  416. * @gid: group identifier
  417. *
  418. * Maps a user-namespace gid pair into a kernel internal kgid,
  419. * and returns that kgid.
  420. *
  421. * When there is no mapping defined for the user-namespace gid
  422. * pair INVALID_GID is returned. Callers are expected to test
  423. * for and handle INVALID_GID being returned. INVALID_GID may be
  424. * tested for using gid_valid().
  425. */
  426. kgid_t make_kgid(struct user_namespace *ns, gid_t gid)
  427. {
  428. /* Map the gid to a global kernel gid */
  429. return KGIDT_INIT(map_id_down(&ns->gid_map, gid));
  430. }
  431. EXPORT_SYMBOL(make_kgid);
  432. /**
  433. * from_kgid - Create a gid from a kgid user-namespace pair.
  434. * @targ: The user namespace we want a gid in.
  435. * @kgid: The kernel internal gid to start with.
  436. *
  437. * Map @kgid into the user-namespace specified by @targ and
  438. * return the resulting gid.
  439. *
  440. * There is always a mapping into the initial user_namespace.
  441. *
  442. * If @kgid has no mapping in @targ (gid_t)-1 is returned.
  443. */
  444. gid_t from_kgid(struct user_namespace *targ, kgid_t kgid)
  445. {
  446. /* Map the gid from a global kernel gid */
  447. return map_id_up(&targ->gid_map, __kgid_val(kgid));
  448. }
  449. EXPORT_SYMBOL(from_kgid);
  450. /**
  451. * from_kgid_munged - Create a gid from a kgid user-namespace pair.
  452. * @targ: The user namespace we want a gid in.
  453. * @kgid: The kernel internal gid to start with.
  454. *
  455. * Map @kgid into the user-namespace specified by @targ and
  456. * return the resulting gid.
  457. *
  458. * There is always a mapping into the initial user_namespace.
  459. *
  460. * Unlike from_kgid from_kgid_munged never fails and always
  461. * returns a valid gid. This makes from_kgid_munged appropriate
  462. * for use in syscalls like stat and getgid where failing the
  463. * system call and failing to provide a valid gid are not options.
  464. *
  465. * If @kgid has no mapping in @targ overflowgid is returned.
  466. */
  467. gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid)
  468. {
  469. gid_t gid;
  470. gid = from_kgid(targ, kgid);
  471. if (gid == (gid_t) -1)
  472. gid = overflowgid;
  473. return gid;
  474. }
  475. EXPORT_SYMBOL(from_kgid_munged);
  476. /**
  477. * make_kprojid - Map a user-namespace projid pair into a kprojid.
  478. * @ns: User namespace that the projid is in
  479. * @projid: Project identifier
  480. *
  481. * Maps a user-namespace uid pair into a kernel internal kuid,
  482. * and returns that kuid.
  483. *
  484. * When there is no mapping defined for the user-namespace projid
  485. * pair INVALID_PROJID is returned. Callers are expected to test
  486. * for and handle INVALID_PROJID being returned. INVALID_PROJID
  487. * may be tested for using projid_valid().
  488. */
  489. kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid)
  490. {
  491. /* Map the uid to a global kernel uid */
  492. return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid));
  493. }
  494. EXPORT_SYMBOL(make_kprojid);
  495. /**
  496. * from_kprojid - Create a projid from a kprojid user-namespace pair.
  497. * @targ: The user namespace we want a projid in.
  498. * @kprojid: The kernel internal project identifier to start with.
  499. *
  500. * Map @kprojid into the user-namespace specified by @targ and
  501. * return the resulting projid.
  502. *
  503. * There is always a mapping into the initial user_namespace.
  504. *
  505. * If @kprojid has no mapping in @targ (projid_t)-1 is returned.
  506. */
  507. projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid)
  508. {
  509. /* Map the uid from a global kernel uid */
  510. return map_id_up(&targ->projid_map, __kprojid_val(kprojid));
  511. }
  512. EXPORT_SYMBOL(from_kprojid);
  513. /**
  514. * from_kprojid_munged - Create a projiid from a kprojid user-namespace pair.
  515. * @targ: The user namespace we want a projid in.
  516. * @kprojid: The kernel internal projid to start with.
  517. *
  518. * Map @kprojid into the user-namespace specified by @targ and
  519. * return the resulting projid.
  520. *
  521. * There is always a mapping into the initial user_namespace.
  522. *
  523. * Unlike from_kprojid from_kprojid_munged never fails and always
  524. * returns a valid projid. This makes from_kprojid_munged
  525. * appropriate for use in syscalls like stat and where
  526. * failing the system call and failing to provide a valid projid are
  527. * not an options.
  528. *
  529. * If @kprojid has no mapping in @targ OVERFLOW_PROJID is returned.
  530. */
  531. projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid)
  532. {
  533. projid_t projid;
  534. projid = from_kprojid(targ, kprojid);
  535. if (projid == (projid_t) -1)
  536. projid = OVERFLOW_PROJID;
  537. return projid;
  538. }
  539. EXPORT_SYMBOL(from_kprojid_munged);
  540. static int uid_m_show(struct seq_file *seq, void *v)
  541. {
  542. struct user_namespace *ns = seq->private;
  543. struct uid_gid_extent *extent = v;
  544. struct user_namespace *lower_ns;
  545. uid_t lower;
  546. lower_ns = seq_user_ns(seq);
  547. if ((lower_ns == ns) && lower_ns->parent)
  548. lower_ns = lower_ns->parent;
  549. lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first));
  550. seq_printf(seq, "%10u %10u %10u\n",
  551. extent->first,
  552. lower,
  553. extent->count);
  554. return 0;
  555. }
  556. static int gid_m_show(struct seq_file *seq, void *v)
  557. {
  558. struct user_namespace *ns = seq->private;
  559. struct uid_gid_extent *extent = v;
  560. struct user_namespace *lower_ns;
  561. gid_t lower;
  562. lower_ns = seq_user_ns(seq);
  563. if ((lower_ns == ns) && lower_ns->parent)
  564. lower_ns = lower_ns->parent;
  565. lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first));
  566. seq_printf(seq, "%10u %10u %10u\n",
  567. extent->first,
  568. lower,
  569. extent->count);
  570. return 0;
  571. }
  572. static int projid_m_show(struct seq_file *seq, void *v)
  573. {
  574. struct user_namespace *ns = seq->private;
  575. struct uid_gid_extent *extent = v;
  576. struct user_namespace *lower_ns;
  577. projid_t lower;
  578. lower_ns = seq_user_ns(seq);
  579. if ((lower_ns == ns) && lower_ns->parent)
  580. lower_ns = lower_ns->parent;
  581. lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first));
  582. seq_printf(seq, "%10u %10u %10u\n",
  583. extent->first,
  584. lower,
  585. extent->count);
  586. return 0;
  587. }
  588. static void *m_start(struct seq_file *seq, loff_t *ppos,
  589. struct uid_gid_map *map)
  590. {
  591. loff_t pos = *ppos;
  592. unsigned extents = map->nr_extents;
  593. smp_rmb();
  594. if (pos >= extents)
  595. return NULL;
  596. if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
  597. return &map->extent[pos];
  598. return &map->forward[pos];
  599. }
  600. static void *uid_m_start(struct seq_file *seq, loff_t *ppos)
  601. {
  602. struct user_namespace *ns = seq->private;
  603. return m_start(seq, ppos, &ns->uid_map);
  604. }
  605. static void *gid_m_start(struct seq_file *seq, loff_t *ppos)
  606. {
  607. struct user_namespace *ns = seq->private;
  608. return m_start(seq, ppos, &ns->gid_map);
  609. }
  610. static void *projid_m_start(struct seq_file *seq, loff_t *ppos)
  611. {
  612. struct user_namespace *ns = seq->private;
  613. return m_start(seq, ppos, &ns->projid_map);
  614. }
  615. static void *m_next(struct seq_file *seq, void *v, loff_t *pos)
  616. {
  617. (*pos)++;
  618. return seq->op->start(seq, pos);
  619. }
  620. static void m_stop(struct seq_file *seq, void *v)
  621. {
  622. return;
  623. }
  624. const struct seq_operations proc_uid_seq_operations = {
  625. .start = uid_m_start,
  626. .stop = m_stop,
  627. .next = m_next,
  628. .show = uid_m_show,
  629. };
  630. const struct seq_operations proc_gid_seq_operations = {
  631. .start = gid_m_start,
  632. .stop = m_stop,
  633. .next = m_next,
  634. .show = gid_m_show,
  635. };
  636. const struct seq_operations proc_projid_seq_operations = {
  637. .start = projid_m_start,
  638. .stop = m_stop,
  639. .next = m_next,
  640. .show = projid_m_show,
  641. };
  642. static bool mappings_overlap(struct uid_gid_map *new_map,
  643. struct uid_gid_extent *extent)
  644. {
  645. u32 upper_first, lower_first, upper_last, lower_last;
  646. unsigned idx;
  647. upper_first = extent->first;
  648. lower_first = extent->lower_first;
  649. upper_last = upper_first + extent->count - 1;
  650. lower_last = lower_first + extent->count - 1;
  651. for (idx = 0; idx < new_map->nr_extents; idx++) {
  652. u32 prev_upper_first, prev_lower_first;
  653. u32 prev_upper_last, prev_lower_last;
  654. struct uid_gid_extent *prev;
  655. if (new_map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
  656. prev = &new_map->extent[idx];
  657. else
  658. prev = &new_map->forward[idx];
  659. prev_upper_first = prev->first;
  660. prev_lower_first = prev->lower_first;
  661. prev_upper_last = prev_upper_first + prev->count - 1;
  662. prev_lower_last = prev_lower_first + prev->count - 1;
  663. /* Does the upper range intersect a previous extent? */
  664. if ((prev_upper_first <= upper_last) &&
  665. (prev_upper_last >= upper_first))
  666. return true;
  667. /* Does the lower range intersect a previous extent? */
  668. if ((prev_lower_first <= lower_last) &&
  669. (prev_lower_last >= lower_first))
  670. return true;
  671. }
  672. return false;
  673. }
  674. /*
  675. * insert_extent - Safely insert a new idmap extent into struct uid_gid_map.
  676. * Takes care to allocate a 4K block of memory if the number of mappings exceeds
  677. * UID_GID_MAP_MAX_BASE_EXTENTS.
  678. */
  679. static int insert_extent(struct uid_gid_map *map, struct uid_gid_extent *extent)
  680. {
  681. struct uid_gid_extent *dest;
  682. if (map->nr_extents == UID_GID_MAP_MAX_BASE_EXTENTS) {
  683. struct uid_gid_extent *forward;
  684. /* Allocate memory for 340 mappings. */
  685. forward = kmalloc_objs(struct uid_gid_extent,
  686. UID_GID_MAP_MAX_EXTENTS);
  687. if (!forward)
  688. return -ENOMEM;
  689. /* Copy over memory. Only set up memory for the forward pointer.
  690. * Defer the memory setup for the reverse pointer.
  691. */
  692. memcpy(forward, map->extent,
  693. map->nr_extents * sizeof(map->extent[0]));
  694. map->forward = forward;
  695. map->reverse = NULL;
  696. }
  697. if (map->nr_extents < UID_GID_MAP_MAX_BASE_EXTENTS)
  698. dest = &map->extent[map->nr_extents];
  699. else
  700. dest = &map->forward[map->nr_extents];
  701. *dest = *extent;
  702. map->nr_extents++;
  703. return 0;
  704. }
  705. /* cmp function to sort() forward mappings */
  706. static int cmp_extents_forward(const void *a, const void *b)
  707. {
  708. const struct uid_gid_extent *e1 = a;
  709. const struct uid_gid_extent *e2 = b;
  710. if (e1->first < e2->first)
  711. return -1;
  712. if (e1->first > e2->first)
  713. return 1;
  714. return 0;
  715. }
  716. /* cmp function to sort() reverse mappings */
  717. static int cmp_extents_reverse(const void *a, const void *b)
  718. {
  719. const struct uid_gid_extent *e1 = a;
  720. const struct uid_gid_extent *e2 = b;
  721. if (e1->lower_first < e2->lower_first)
  722. return -1;
  723. if (e1->lower_first > e2->lower_first)
  724. return 1;
  725. return 0;
  726. }
  727. /*
  728. * sort_idmaps - Sorts an array of idmap entries.
  729. * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
  730. */
  731. static int sort_idmaps(struct uid_gid_map *map)
  732. {
  733. if (map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
  734. return 0;
  735. /* Sort forward array. */
  736. sort(map->forward, map->nr_extents, sizeof(struct uid_gid_extent),
  737. cmp_extents_forward, NULL);
  738. /* Only copy the memory from forward we actually need. */
  739. map->reverse = kmemdup_array(map->forward, map->nr_extents,
  740. sizeof(struct uid_gid_extent), GFP_KERNEL);
  741. if (!map->reverse)
  742. return -ENOMEM;
  743. /* Sort reverse array. */
  744. sort(map->reverse, map->nr_extents, sizeof(struct uid_gid_extent),
  745. cmp_extents_reverse, NULL);
  746. return 0;
  747. }
  748. /**
  749. * verify_root_map() - check the uid 0 mapping
  750. * @file: idmapping file
  751. * @map_ns: user namespace of the target process
  752. * @new_map: requested idmap
  753. *
  754. * If a process requests mapping parent uid 0 into the new ns, verify that the
  755. * process writing the map had the CAP_SETFCAP capability as the target process
  756. * will be able to write fscaps that are valid in ancestor user namespaces.
  757. *
  758. * Return: true if the mapping is allowed, false if not.
  759. */
  760. static bool verify_root_map(const struct file *file,
  761. struct user_namespace *map_ns,
  762. struct uid_gid_map *new_map)
  763. {
  764. int idx;
  765. const struct user_namespace *file_ns = file->f_cred->user_ns;
  766. struct uid_gid_extent *extent0 = NULL;
  767. for (idx = 0; idx < new_map->nr_extents; idx++) {
  768. if (new_map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
  769. extent0 = &new_map->extent[idx];
  770. else
  771. extent0 = &new_map->forward[idx];
  772. if (extent0->lower_first == 0)
  773. break;
  774. extent0 = NULL;
  775. }
  776. if (!extent0)
  777. return true;
  778. if (map_ns == file_ns) {
  779. /* The process unshared its ns and is writing to its own
  780. * /proc/self/uid_map. User already has full capabilites in
  781. * the new namespace. Verify that the parent had CAP_SETFCAP
  782. * when it unshared.
  783. * */
  784. if (!file_ns->parent_could_setfcap)
  785. return false;
  786. } else {
  787. /* Process p1 is writing to uid_map of p2, who is in a child
  788. * user namespace to p1's. Verify that the opener of the map
  789. * file has CAP_SETFCAP against the parent of the new map
  790. * namespace */
  791. if (!file_ns_capable(file, map_ns->parent, CAP_SETFCAP))
  792. return false;
  793. }
  794. return true;
  795. }
  796. static ssize_t map_write(struct file *file, const char __user *buf,
  797. size_t count, loff_t *ppos,
  798. int cap_setid,
  799. struct uid_gid_map *map,
  800. struct uid_gid_map *parent_map)
  801. {
  802. struct seq_file *seq = file->private_data;
  803. struct user_namespace *map_ns = seq->private;
  804. struct uid_gid_map new_map;
  805. unsigned idx;
  806. struct uid_gid_extent extent;
  807. char *kbuf, *pos, *next_line;
  808. ssize_t ret;
  809. /* Only allow < page size writes at the beginning of the file */
  810. if ((*ppos != 0) || (count >= PAGE_SIZE))
  811. return -EINVAL;
  812. /* Slurp in the user data */
  813. kbuf = memdup_user_nul(buf, count);
  814. if (IS_ERR(kbuf))
  815. return PTR_ERR(kbuf);
  816. /*
  817. * The userns_state_mutex serializes all writes to any given map.
  818. *
  819. * Any map is only ever written once.
  820. *
  821. * An id map fits within 1 cache line on most architectures.
  822. *
  823. * On read nothing needs to be done unless you are on an
  824. * architecture with a crazy cache coherency model like alpha.
  825. *
  826. * There is a one time data dependency between reading the
  827. * count of the extents and the values of the extents. The
  828. * desired behavior is to see the values of the extents that
  829. * were written before the count of the extents.
  830. *
  831. * To achieve this smp_wmb() is used on guarantee the write
  832. * order and smp_rmb() is guaranteed that we don't have crazy
  833. * architectures returning stale data.
  834. */
  835. mutex_lock(&userns_state_mutex);
  836. memset(&new_map, 0, sizeof(struct uid_gid_map));
  837. ret = -EPERM;
  838. /* Only allow one successful write to the map */
  839. if (map->nr_extents != 0)
  840. goto out;
  841. /*
  842. * Adjusting namespace settings requires capabilities on the target.
  843. */
  844. if (cap_valid(cap_setid) && !file_ns_capable(file, map_ns, CAP_SYS_ADMIN))
  845. goto out;
  846. /* Parse the user data */
  847. ret = -EINVAL;
  848. pos = kbuf;
  849. for (; pos; pos = next_line) {
  850. /* Find the end of line and ensure I don't look past it */
  851. next_line = strchr(pos, '\n');
  852. if (next_line) {
  853. *next_line = '\0';
  854. next_line++;
  855. if (*next_line == '\0')
  856. next_line = NULL;
  857. }
  858. pos = skip_spaces(pos);
  859. extent.first = simple_strtoul(pos, &pos, 10);
  860. if (!isspace(*pos))
  861. goto out;
  862. pos = skip_spaces(pos);
  863. extent.lower_first = simple_strtoul(pos, &pos, 10);
  864. if (!isspace(*pos))
  865. goto out;
  866. pos = skip_spaces(pos);
  867. extent.count = simple_strtoul(pos, &pos, 10);
  868. if (*pos && !isspace(*pos))
  869. goto out;
  870. /* Verify there is not trailing junk on the line */
  871. pos = skip_spaces(pos);
  872. if (*pos != '\0')
  873. goto out;
  874. /* Verify we have been given valid starting values */
  875. if ((extent.first == (u32) -1) ||
  876. (extent.lower_first == (u32) -1))
  877. goto out;
  878. /* Verify count is not zero and does not cause the
  879. * extent to wrap
  880. */
  881. if ((extent.first + extent.count) <= extent.first)
  882. goto out;
  883. if ((extent.lower_first + extent.count) <=
  884. extent.lower_first)
  885. goto out;
  886. /* Do the ranges in extent overlap any previous extents? */
  887. if (mappings_overlap(&new_map, &extent))
  888. goto out;
  889. if ((new_map.nr_extents + 1) == UID_GID_MAP_MAX_EXTENTS &&
  890. (next_line != NULL))
  891. goto out;
  892. ret = insert_extent(&new_map, &extent);
  893. if (ret < 0)
  894. goto out;
  895. ret = -EINVAL;
  896. }
  897. /* Be very certain the new map actually exists */
  898. if (new_map.nr_extents == 0)
  899. goto out;
  900. ret = -EPERM;
  901. /* Validate the user is allowed to use user id's mapped to. */
  902. if (!new_idmap_permitted(file, map_ns, cap_setid, &new_map))
  903. goto out;
  904. ret = -EPERM;
  905. /* Map the lower ids from the parent user namespace to the
  906. * kernel global id space.
  907. */
  908. for (idx = 0; idx < new_map.nr_extents; idx++) {
  909. struct uid_gid_extent *e;
  910. u32 lower_first;
  911. if (new_map.nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
  912. e = &new_map.extent[idx];
  913. else
  914. e = &new_map.forward[idx];
  915. lower_first = map_id_range_down(parent_map,
  916. e->lower_first,
  917. e->count);
  918. /* Fail if we can not map the specified extent to
  919. * the kernel global id space.
  920. */
  921. if (lower_first == (u32) -1)
  922. goto out;
  923. e->lower_first = lower_first;
  924. }
  925. /*
  926. * If we want to use binary search for lookup, this clones the extent
  927. * array and sorts both copies.
  928. */
  929. ret = sort_idmaps(&new_map);
  930. if (ret < 0)
  931. goto out;
  932. /* Install the map */
  933. if (new_map.nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) {
  934. memcpy(map->extent, new_map.extent,
  935. new_map.nr_extents * sizeof(new_map.extent[0]));
  936. } else {
  937. map->forward = new_map.forward;
  938. map->reverse = new_map.reverse;
  939. }
  940. smp_wmb();
  941. map->nr_extents = new_map.nr_extents;
  942. *ppos = count;
  943. ret = count;
  944. out:
  945. if (ret < 0 && new_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
  946. kfree(new_map.forward);
  947. kfree(new_map.reverse);
  948. map->forward = NULL;
  949. map->reverse = NULL;
  950. map->nr_extents = 0;
  951. }
  952. mutex_unlock(&userns_state_mutex);
  953. kfree(kbuf);
  954. return ret;
  955. }
  956. ssize_t proc_uid_map_write(struct file *file, const char __user *buf,
  957. size_t size, loff_t *ppos)
  958. {
  959. struct seq_file *seq = file->private_data;
  960. struct user_namespace *ns = seq->private;
  961. struct user_namespace *seq_ns = seq_user_ns(seq);
  962. if (!ns->parent)
  963. return -EPERM;
  964. if ((seq_ns != ns) && (seq_ns != ns->parent))
  965. return -EPERM;
  966. return map_write(file, buf, size, ppos, CAP_SETUID,
  967. &ns->uid_map, &ns->parent->uid_map);
  968. }
  969. ssize_t proc_gid_map_write(struct file *file, const char __user *buf,
  970. size_t size, loff_t *ppos)
  971. {
  972. struct seq_file *seq = file->private_data;
  973. struct user_namespace *ns = seq->private;
  974. struct user_namespace *seq_ns = seq_user_ns(seq);
  975. if (!ns->parent)
  976. return -EPERM;
  977. if ((seq_ns != ns) && (seq_ns != ns->parent))
  978. return -EPERM;
  979. return map_write(file, buf, size, ppos, CAP_SETGID,
  980. &ns->gid_map, &ns->parent->gid_map);
  981. }
  982. ssize_t proc_projid_map_write(struct file *file, const char __user *buf,
  983. size_t size, loff_t *ppos)
  984. {
  985. struct seq_file *seq = file->private_data;
  986. struct user_namespace *ns = seq->private;
  987. struct user_namespace *seq_ns = seq_user_ns(seq);
  988. if (!ns->parent)
  989. return -EPERM;
  990. if ((seq_ns != ns) && (seq_ns != ns->parent))
  991. return -EPERM;
  992. /* Anyone can set any valid project id no capability needed */
  993. return map_write(file, buf, size, ppos, -1,
  994. &ns->projid_map, &ns->parent->projid_map);
  995. }
  996. static bool new_idmap_permitted(const struct file *file,
  997. struct user_namespace *ns, int cap_setid,
  998. struct uid_gid_map *new_map)
  999. {
  1000. const struct cred *cred = file->f_cred;
  1001. if (cap_setid == CAP_SETUID && !verify_root_map(file, ns, new_map))
  1002. return false;
  1003. /* Don't allow mappings that would allow anything that wouldn't
  1004. * be allowed without the establishment of unprivileged mappings.
  1005. */
  1006. if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
  1007. uid_eq(ns->owner, cred->euid)) {
  1008. u32 id = new_map->extent[0].lower_first;
  1009. if (cap_setid == CAP_SETUID) {
  1010. kuid_t uid = make_kuid(ns->parent, id);
  1011. if (uid_eq(uid, cred->euid))
  1012. return true;
  1013. } else if (cap_setid == CAP_SETGID) {
  1014. kgid_t gid = make_kgid(ns->parent, id);
  1015. if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
  1016. gid_eq(gid, cred->egid))
  1017. return true;
  1018. }
  1019. }
  1020. /* Allow anyone to set a mapping that doesn't require privilege */
  1021. if (!cap_valid(cap_setid))
  1022. return true;
  1023. /* Allow the specified ids if we have the appropriate capability
  1024. * (CAP_SETUID or CAP_SETGID) over the parent user namespace.
  1025. * And the opener of the id file also has the appropriate capability.
  1026. */
  1027. if (ns_capable(ns->parent, cap_setid) &&
  1028. file_ns_capable(file, ns->parent, cap_setid))
  1029. return true;
  1030. return false;
  1031. }
  1032. int proc_setgroups_show(struct seq_file *seq, void *v)
  1033. {
  1034. struct user_namespace *ns = seq->private;
  1035. unsigned long userns_flags = READ_ONCE(ns->flags);
  1036. seq_printf(seq, "%s\n",
  1037. (userns_flags & USERNS_SETGROUPS_ALLOWED) ?
  1038. "allow" : "deny");
  1039. return 0;
  1040. }
  1041. ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
  1042. size_t count, loff_t *ppos)
  1043. {
  1044. struct seq_file *seq = file->private_data;
  1045. struct user_namespace *ns = seq->private;
  1046. char kbuf[8], *pos;
  1047. bool setgroups_allowed;
  1048. ssize_t ret;
  1049. /* Only allow a very narrow range of strings to be written */
  1050. ret = -EINVAL;
  1051. if ((*ppos != 0) || (count >= sizeof(kbuf)))
  1052. goto out;
  1053. /* What was written? */
  1054. ret = -EFAULT;
  1055. if (copy_from_user(kbuf, buf, count))
  1056. goto out;
  1057. kbuf[count] = '\0';
  1058. pos = kbuf;
  1059. /* What is being requested? */
  1060. ret = -EINVAL;
  1061. if (strncmp(pos, "allow", 5) == 0) {
  1062. pos += 5;
  1063. setgroups_allowed = true;
  1064. }
  1065. else if (strncmp(pos, "deny", 4) == 0) {
  1066. pos += 4;
  1067. setgroups_allowed = false;
  1068. }
  1069. else
  1070. goto out;
  1071. /* Verify there is not trailing junk on the line */
  1072. pos = skip_spaces(pos);
  1073. if (*pos != '\0')
  1074. goto out;
  1075. ret = -EPERM;
  1076. mutex_lock(&userns_state_mutex);
  1077. if (setgroups_allowed) {
  1078. /* Enabling setgroups after setgroups has been disabled
  1079. * is not allowed.
  1080. */
  1081. if (!(ns->flags & USERNS_SETGROUPS_ALLOWED))
  1082. goto out_unlock;
  1083. } else {
  1084. /* Permanently disabling setgroups after setgroups has
  1085. * been enabled by writing the gid_map is not allowed.
  1086. */
  1087. if (ns->gid_map.nr_extents != 0)
  1088. goto out_unlock;
  1089. ns->flags &= ~USERNS_SETGROUPS_ALLOWED;
  1090. }
  1091. mutex_unlock(&userns_state_mutex);
  1092. /* Report a successful write */
  1093. *ppos = count;
  1094. ret = count;
  1095. out:
  1096. return ret;
  1097. out_unlock:
  1098. mutex_unlock(&userns_state_mutex);
  1099. goto out;
  1100. }
  1101. bool userns_may_setgroups(const struct user_namespace *ns)
  1102. {
  1103. bool allowed;
  1104. mutex_lock(&userns_state_mutex);
  1105. /* It is not safe to use setgroups until a gid mapping in
  1106. * the user namespace has been established.
  1107. */
  1108. allowed = ns->gid_map.nr_extents != 0;
  1109. /* Is setgroups allowed? */
  1110. allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED);
  1111. mutex_unlock(&userns_state_mutex);
  1112. return allowed;
  1113. }
  1114. /*
  1115. * Returns true if @child is the same namespace or a descendant of
  1116. * @ancestor.
  1117. */
  1118. bool in_userns(const struct user_namespace *ancestor,
  1119. const struct user_namespace *child)
  1120. {
  1121. const struct user_namespace *ns;
  1122. for (ns = child; ns->level > ancestor->level; ns = ns->parent)
  1123. ;
  1124. return (ns == ancestor);
  1125. }
  1126. bool current_in_userns(const struct user_namespace *target_ns)
  1127. {
  1128. return in_userns(target_ns, current_user_ns());
  1129. }
  1130. EXPORT_SYMBOL(current_in_userns);
  1131. static struct ns_common *userns_get(struct task_struct *task)
  1132. {
  1133. struct user_namespace *user_ns;
  1134. rcu_read_lock();
  1135. user_ns = get_user_ns(__task_cred(task)->user_ns);
  1136. rcu_read_unlock();
  1137. return user_ns ? &user_ns->ns : NULL;
  1138. }
  1139. static void userns_put(struct ns_common *ns)
  1140. {
  1141. put_user_ns(to_user_ns(ns));
  1142. }
  1143. static int userns_install(struct nsset *nsset, struct ns_common *ns)
  1144. {
  1145. struct user_namespace *user_ns = to_user_ns(ns);
  1146. struct cred *cred;
  1147. /* Don't allow gaining capabilities by reentering
  1148. * the same user namespace.
  1149. */
  1150. if (user_ns == current_user_ns())
  1151. return -EINVAL;
  1152. /* Tasks that share a thread group must share a user namespace */
  1153. if (!thread_group_empty(current))
  1154. return -EINVAL;
  1155. if (current->fs->users != 1)
  1156. return -EINVAL;
  1157. if (!ns_capable(user_ns, CAP_SYS_ADMIN))
  1158. return -EPERM;
  1159. cred = nsset_cred(nsset);
  1160. if (!cred)
  1161. return -EINVAL;
  1162. put_user_ns(cred->user_ns);
  1163. set_cred_user_ns(cred, get_user_ns(user_ns));
  1164. if (set_cred_ucounts(cred) < 0)
  1165. return -EINVAL;
  1166. return 0;
  1167. }
  1168. struct ns_common *ns_get_owner(struct ns_common *ns)
  1169. {
  1170. struct user_namespace *my_user_ns = current_user_ns();
  1171. struct user_namespace *owner, *p;
  1172. /* See if the owner is in the current user namespace */
  1173. owner = p = ns->ops->owner(ns);
  1174. for (;;) {
  1175. if (!p)
  1176. return ERR_PTR(-EPERM);
  1177. if (p == my_user_ns)
  1178. break;
  1179. p = p->parent;
  1180. }
  1181. return &get_user_ns(owner)->ns;
  1182. }
  1183. static struct user_namespace *userns_owner(struct ns_common *ns)
  1184. {
  1185. return to_user_ns(ns)->parent;
  1186. }
  1187. const struct proc_ns_operations userns_operations = {
  1188. .name = "user",
  1189. .get = userns_get,
  1190. .put = userns_put,
  1191. .install = userns_install,
  1192. .owner = userns_owner,
  1193. .get_parent = ns_get_owner,
  1194. };
  1195. static __init int user_namespaces_init(void)
  1196. {
  1197. user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC | SLAB_ACCOUNT);
  1198. ns_tree_add(&init_user_ns);
  1199. return 0;
  1200. }
  1201. subsys_initcall(user_namespaces_init);