From 674ece819199b16977d293043c58243fdcd48a6b Mon Sep 17 00:00:00 2001 From: Brooks Davis Date: Fri, 9 Aug 2024 01:28:56 +0100 Subject: [PATCH] mmap: explicitly expose capability permissions Introduce two new PROT_ values PROT_CAP and PROT_NO_CAP. They combine to allow capability permissions to be implied in unmodified code using PROT_READ and PROT_WRITE which allowing capability permissions to be set or unset explicity. If either of PROT_CAP or PROT_NO_CAP are set, then the value of the PROT_CAP flag bit defines the page protections and capability permissions for a given mapping. In the underlying implementation, PROT_CAP maps to VM_PROT_READ_CAP and VM_PROT_WRITE_CAP depending on the values of PROT_READ and PROT_WRITE. PROT_NO_CAP maps to a new VM_PROT_NO_IMPLY_CAP. VM_PROT_NO_IMPLY_CAP is used transiently in fo_mmap implementations to avoid accidently adding capability permission and is also added to vm_entry's max_protection to allow superset tests to succeed when reducing capability permissions on a mapping via mmap or mprotect. --- bin/cheribsdtest/cheribsdtest_vm.c | 75 +++++++++++ lib/libsys/mmap.2 | 51 ++++++- sys/arm64/include/cherireg.h | 11 +- sys/arm64/vmm/vmm.c | 2 +- sys/cheri/cherireg.h | 2 + .../linuxkpi/common/include/linux/page.h | 4 +- sys/dev/drm/drmkpi/include/linux/page.h | 4 +- sys/fs/devfs/devfs_vnops.c | 2 + sys/riscv/include/cherireg.h | 12 +- sys/sys/mman.h | 5 +- sys/vm/vm.h | 19 +-- sys/vm/vm_map.c | 28 +++- sys/vm/vm_mmap.c | 124 ++++++++++++++---- 13 files changed, 280 insertions(+), 59 deletions(-) diff --git a/bin/cheribsdtest/cheribsdtest_vm.c b/bin/cheribsdtest/cheribsdtest_vm.c index e539256d1b87..d0ec78392d46 100644 --- a/bin/cheribsdtest/cheribsdtest_vm.c +++ b/bin/cheribsdtest/cheribsdtest_vm.c @@ -115,6 +115,81 @@ CHERIBSDTEST(vm_tag_mmap_anon, cheribsdtest_success(); } +CHERIBSDTEST(vm_tag_mmap_anon_cap, + "check tags are stored for MAP_ANON pages with explicit permissions") +{ + mmap_and_check_tag_stored(-1, PROT_READ | PROT_WRITE | PROT_CAP, + MAP_ANON); + cheribsdtest_success(); +} + +CHERIBSDTEST(vm_notag_mmap_no_cap, + "check tags are not stored it we request no capablity permissions", + .ct_flags = CT_FLAG_SIGNAL | CT_FLAG_SI_CODE | CT_FLAG_SI_TRAPNO | CT_FLAG_SI_ADDR, + .ct_signum = SIGSEGV, + .ct_si_code = SEGV_STORETAG, + .ct_si_trapno = TRAPNO_STORE_CAP_PF, + .ct_check_skip = skip_need_writable_tmp) +{ + void * __capability volatile *cp; + void * __capability cp_value; + int v; + + cp = CHERIBSDTEST_CHECK_SYSCALL(mmap(NULL, getpagesize(), + PROT_READ | PROT_WRITE | PROT_NO_CAP, MAP_ANON, -1, 0)); + cheribsdtest_set_expected_si_addr(NULL_DERIVED_VOIDP(cp)); + cp_value = cheri_ptr(&v, sizeof(v)); + *cp = cp_value; + cheribsdtest_failure_errx("tagged store succeeded"); +} + +CHERIBSDTEST(vm_notag_mprotect_no_cap, + "check tags are not stored if we remove capability page permissions", + .ct_flags = CT_FLAG_SIGNAL | CT_FLAG_SI_CODE | CT_FLAG_SI_TRAPNO | CT_FLAG_SI_ADDR, + .ct_signum = SIGSEGV, + .ct_si_code = SEGV_STORETAG, + .ct_si_trapno = TRAPNO_STORE_CAP_PF, + .ct_check_skip = skip_need_writable_tmp) +{ + void * __capability volatile *cp; + void * __capability cp_value; + int v; + + cp = CHERIBSDTEST_CHECK_SYSCALL(mmap(NULL, getpagesize(), + PROT_READ | PROT_WRITE, MAP_ANON, -1, 0)); + CHERIBSDTEST_CHECK_SYSCALL(mprotect(__DEVOLATILE(void *, cp), + getpagesize(), PROT_READ | PROT_WRITE | PROT_NO_CAP)); + cheribsdtest_set_expected_si_addr(NULL_DERIVED_VOIDP(cp)); + cp_value = cheri_ptr(&v, sizeof(v)); + *cp = cp_value; + cheribsdtest_failure_errx("tagged store succeeded"); +} + +static void +mmap_check_bad_protections(int prot, int expected_errno) +{ + CHERIBSDTEST_CHECK_CALL_ERROR((int)(intptr_t)mmap(NULL, getpagesize(), + prot, MAP_ANON, -1, 0), expected_errno); +} + +CHERIBSDTEST(vm_mmap_diallowed_prot, + "check that disallowed protection combinations are rejected") +{ + /* Max protections not a superset */ + mmap_check_bad_protections(PROT_READ | PROT_WRITE | PROT_MAX(PROT_READ), + ENOTSUP); + + /* Mixing implied and explict protections */ + mmap_check_bad_protections(PROT_READ | PROT_CAP | PROT_MAX(PROT_READ), + ENOTSUP); + + /* Disallowed explicit capability protection combinations */ + mmap_check_bad_protections(PROT_CAP, ENOTSUP); + mmap_check_bad_protections(PROT_MAX(PROT_CAP), ENOTSUP); + + cheribsdtest_success(); +} + CHERIBSDTEST(vm_tag_shm_open_anon_shared, "check tags are stored for SHM_ANON MAP_SHARED pages") { diff --git a/lib/libsys/mmap.2 b/lib/libsys/mmap.2 index 6f5694a3809b..d9b4c4dc4d4a 100644 --- a/lib/libsys/mmap.2 +++ b/lib/libsys/mmap.2 @@ -104,7 +104,7 @@ argument by .Em or Ns 'ing the following values: .Pp -.Bl -tag -width PROT_WRITE -compact +.Bl -tag -width PROT_NO_CAP -compact .It Dv PROT_NONE Pages may not be accessed. .It Dv PROT_READ @@ -113,8 +113,43 @@ Pages may be read. Pages may be written. .It Dv PROT_EXEC Pages may be executed. +.It Dv PROT_CAP +CHERI capabilities may be read or written as dictated by +.Dv PROT_READ +and +.Dv PROT_WRITE . +.It Dv PROT_NO_CAP +Respect the absence of +.Dv PROT_CAP . .El .Pp +On CHERI platforms, compatability is retained with unmodified POSIX +programs by implying +.Dv PROT_CAP +if either of +.Dv PROT_READ +and +.Dv PROT_WRITE +is set unless the underlying backing store can not safety support +capabilities (e.g., a +.Dv MAP_SHARED +mapping of a file). +If either of +.Dv PROT_NO_CAP +are set, then capability permissions will not be implied. +When +.Dv PROT_CAP +is passed, at least one of +.Dv PROT_READ +and +.Dv PROT_WRITE +is required. +On non-CHERI platforms the +.Dv PROT_CAP +and +.Dv PROT_NO_CAP +flags have no effect. +.Pp In addition to these protection flags, .Fx provides the ability to set the maximum protection of a region allocated by @@ -130,6 +165,14 @@ values wrapped in the macro into the .Fa prot argument. +The +.Dv PROT_MAX() +flags must be a superset of the unwrapped flags. +If one set of flags contains +.Dv PROT_CAP +or +.Dv PROT_NO_CAP +then both must. .Pp The .Fa flags @@ -614,6 +657,12 @@ The .Fa prot argument contains protections which are not a subset of the specified maximum protections. +.It Bq Er ENOTSUP +.Dv PROT_CAP +without +.Dv PROT_READ +or +.Dv PROT_WRITE . .El .Sh SEE ALSO .Xr madvise 2 , diff --git a/sys/arm64/include/cherireg.h b/sys/arm64/include/cherireg.h index 307c72dbc960..d47db9f7ee2d 100644 --- a/sys/arm64/include/cherireg.h +++ b/sys/arm64/include/cherireg.h @@ -102,13 +102,16 @@ * vm_prot_t to capability permission bits */ #define CHERI_PERMS_PROT2PERM_READ \ - (CHERI_PERM_LOAD | CHERI_PERM_LOAD_CAP | CHERI_PERM_MUTABLE_LOAD) + CHERI_PERM_LOAD +#define CHERI_PERMS_PROT2PERM_READ_CAP \ + (CHERI_PERM_LOAD_CAP | CHERI_PERM_MUTABLE_LOAD) #define CHERI_PERMS_PROT2PERM_WRITE \ - (CHERI_PERM_STORE | CHERI_PERM_STORE_CAP | \ - CHERI_PERM_STORE_LOCAL_CAP) + CHERI_PERM_STORE +#define CHERI_PERMS_PROT2PERM_WRITE_CAP \ + (CHERI_PERM_STORE_CAP | CHERI_PERM_STORE_LOCAL_CAP) #define CHERI_PERMS_PROT2PERM_EXEC \ (CHERI_PERM_EXECUTE | CHERI_PERM_EXECUTIVE | \ - CHERI_PERMS_PROT2PERM_READ) + CHERI_PERMS_PROT2PERM_READ | CHERI_PERMS_PROT2PERM_READ_CAP) /* * Basic userspace permission mask; CHERI_PERM_EXECUTE will be added for diff --git a/sys/arm64/vmm/vmm.c b/sys/arm64/vmm/vmm.c index 3a4a269ed4ee..f77632b1e607 100644 --- a/sys/arm64/vmm/vmm.c +++ b/sys/arm64/vmm/vmm.c @@ -849,7 +849,7 @@ vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, * Hide the bits implicitly added by vm_mmap_memseg(). * Userspace might not expect to see them returned here. */ - *prot &= ~VM_PROT_CAP; + *prot &= ~(VM_PROT_CAP | VM_PROT_NO_IMPLY_CAP); } if (flags) *flags = mmnext->flags; diff --git a/sys/cheri/cherireg.h b/sys/cheri/cherireg.h index 2f023bdc3657..d5cd671dacc5 100644 --- a/sys/cheri/cherireg.h +++ b/sys/cheri/cherireg.h @@ -108,7 +108,9 @@ * Definition for mapping vm_prot_t to capability permission */ #define CHERI_PROT2PERM_READ_PERMS CHERI_PERMS_PROT2PERM_READ +#define CHERI_PROT2PERM_READ_CAP_PERMS CHERI_PERMS_PROT2PERM_READ_CAP #define CHERI_PROT2PERM_WRITE_PERMS CHERI_PERMS_PROT2PERM_WRITE +#define CHERI_PROT2PERM_WRITE_CAP_PERMS CHERI_PERMS_PROT2PERM_WRITE_CAP #define CHERI_PROT2PERM_EXEC_PERMS CHERI_PERMS_PROT2PERM_EXEC #define CHERI_PROT2PERM_MASK \ (CHERI_PROT2PERM_READ_PERMS | CHERI_PROT2PERM_WRITE_PERMS | \ diff --git a/sys/compat/linuxkpi/common/include/linux/page.h b/sys/compat/linuxkpi/common/include/linux/page.h index 183578930b8a..7fded17ca7ca 100644 --- a/sys/compat/linuxkpi/common/include/linux/page.h +++ b/sys/compat/linuxkpi/common/include/linux/page.h @@ -50,8 +50,8 @@ typedef unsigned long pgprot_t; #define page vm_page -#define LINUXKPI_PROT_VALID (1 << 5) -#define LINUXKPI_CACHE_MODE_SHIFT 6 +#define LINUXKPI_PROT_VALID (1 << 6) +#define LINUXKPI_CACHE_MODE_SHIFT 7 CTASSERT((VM_PROT_ALL & -LINUXKPI_PROT_VALID) == 0); diff --git a/sys/dev/drm/drmkpi/include/linux/page.h b/sys/dev/drm/drmkpi/include/linux/page.h index 8b59c0991738..53267539a78b 100644 --- a/sys/dev/drm/drmkpi/include/linux/page.h +++ b/sys/dev/drm/drmkpi/include/linux/page.h @@ -49,8 +49,8 @@ typedef unsigned long pgprot_t; #define page vm_page -#define DRMCOMPAT_PROT_VALID (1 << 5) -#define DRMCOMPAT_CACHE_MODE_SHIFT 6 +#define DRMCOMPAT_PROT_VALID (1 << 6) +#define DRMCOMPAT_CACHE_MODE_SHIFT 7 CTASSERT((VM_PROT_ALL & -DRMCOMPAT_PROT_VALID) == 0); diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c index e667a0d8b78e..7481dc74db81 100644 --- a/sys/fs/devfs/devfs_vnops.c +++ b/sys/fs/devfs/devfs_vnops.c @@ -2014,6 +2014,8 @@ devfs_mmap_f(struct file *fp, vm_map_t map, vm_pointer_t *addr, else if ((prot & VM_PROT_WRITE) != 0) return (EACCES); } + if ((prot & (VM_PROT_CAP | VM_PROT_NO_IMPLY_CAP)) != 0) + maxprot = VM_PROT_ADD_CAP(maxprot); maxprot &= cap_maxprot; fpop = td->td_fpop; diff --git a/sys/riscv/include/cherireg.h b/sys/riscv/include/cherireg.h index d2ad9f88441a..a8435becc5f9 100644 --- a/sys/riscv/include/cherireg.h +++ b/sys/riscv/include/cherireg.h @@ -88,12 +88,16 @@ * vm_prot_t to capability permission bits */ #define CHERI_PERMS_PROT2PERM_READ \ - (CHERI_PERM_LOAD | CHERI_PERM_LOAD_CAP) + CHERI_PERM_LOAD +#define CHERI_PERMS_PROT2PERM_READ_CAP \ + CHERI_PERM_LOAD_CAP #define CHERI_PERMS_PROT2PERM_WRITE \ - (CHERI_PERM_STORE | CHERI_PERM_STORE_CAP | \ - CHERI_PERM_STORE_LOCAL_CAP) + CHERI_PERM_STORE +#define CHERI_PERMS_PROT2PERM_WRITE_CAP \ + (CHERI_PERM_STORE_CAP | CHERI_PERM_STORE_LOCAL_CAP) #define CHERI_PERMS_PROT2PERM_EXEC \ - (CHERI_PERM_EXECUTE | CHERI_PERMS_PROT2PERM_READ) + (CHERI_PERM_EXECUTE | CHERI_PERMS_PROT2PERM_READ | \ + CHERI_PERMS_PROT2PERM_READ_CAP) /* * Hardware defines a kind of tripartite taxonomy: memory, type, and CID. diff --git a/sys/sys/mman.h b/sys/sys/mman.h index 30a5b71ad146..80cff6690224 100644 --- a/sys/sys/mman.h +++ b/sys/sys/mman.h @@ -52,8 +52,11 @@ #define PROT_READ 0x01 /* pages can be read */ #define PROT_WRITE 0x02 /* pages can be written */ #define PROT_EXEC 0x04 /* pages can be executed */ +#define PROT_CAP 0x08 /* capabilities can be read/written */ +#define PROT_NO_CAP 0x10 /* honor PROT_CAP absense */ #if __BSD_VISIBLE -#define _PROT_ALL (PROT_READ | PROT_WRITE | PROT_EXEC) +#define _PROT_CAP (PROT_CAP | PROT_NO_CAP) +#define _PROT_ALL (PROT_READ | PROT_WRITE | PROT_EXEC | _PROT_CAP) #define PROT_EXTRACT(prot) ((prot) & _PROT_ALL) #define _PROT_MAX_SHIFT 16 diff --git a/sys/vm/vm.h b/sys/vm/vm.h index 597b25a695e5..4cb3644fc49f 100644 --- a/sys/vm/vm.h +++ b/sys/vm/vm.h @@ -76,8 +76,9 @@ typedef u_char vm_prot_t; /* protection codes */ #define VM_PROT_EXECUTE ((vm_prot_t) 0x04) #define VM_PROT_READ_CAP ((vm_prot_t) 0x08) #define VM_PROT_WRITE_CAP ((vm_prot_t) 0x10) -#define VM_PROT_COPY ((vm_prot_t) 0x20) /* copy-on-read */ -#define VM_PROT_PRIV_FLAG ((vm_prot_t) 0x40) +#define VM_PROT_NO_IMPLY_CAP ((vm_prot_t) 0x20) +#define VM_PROT_COPY ((vm_prot_t) 0x40) /* copy-on-read */ +#define VM_PROT_PRIV_FLAG ((vm_prot_t) 0x80) #define VM_PROT_FAULT_LOOKUP VM_PROT_PRIV_FLAG #define VM_PROT_QUICK_NOFAULT VM_PROT_PRIV_FLAG /* same to save bits */ @@ -86,17 +87,19 @@ typedef u_char vm_prot_t; /* protection codes */ #define VM_PROT_DEFAULT VM_PROT_RWX #define VM_PROT_CAP (VM_PROT_READ_CAP|VM_PROT_WRITE_CAP) #define VM_PROT_RW_CAP (VM_PROT_RW|VM_PROT_CAP) -#define VM_PROT_ALL (VM_PROT_RWX|VM_PROT_CAP) +#define VM_PROT_ALL (VM_PROT_RWX|VM_PROT_CAP|VM_PROT_NO_IMPLY_CAP) #define VM_PROT_ADD_CAP(prot) __extension__ ({ \ vm_prot_t cp, p; \ \ cp = p = (prot); \ - if ((p & VM_PROT_READ) != 0) \ - cp |= VM_PROT_READ_CAP; \ - if ((p & VM_PROT_WRITE) != 0) \ - cp |= VM_PROT_WRITE_CAP; \ - cp; \ + if ((p & (VM_PROT_CAP | VM_PROT_NO_IMPLY_CAP)) == 0) { \ + if ((p & VM_PROT_READ) != 0) \ + cp |= VM_PROT_READ_CAP; \ + if ((p & VM_PROT_WRITE) != 0) \ + cp |= VM_PROT_WRITE_CAP; \ + } \ + cp |= VM_PROT_NO_IMPLY_CAP; \ }) #define VM_PROT_EXTRACT(prot) ((prot) & VM_PROT_ALL) diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index e78bb4b6119c..8120b78b3a7b 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -2099,7 +2099,7 @@ vm_map_insert1(vm_map_t map, vm_object_t object, vm_ooffset_t offset, new_entry->inheritance = inheritance; new_entry->protection = prot; - new_entry->max_protection = max; + new_entry->max_protection = max | VM_PROT_NO_IMPLY_CAP; new_entry->wired_count = 0; new_entry->wiring_thread = NULL; new_entry->read_ahead = VM_FAULT_READ_AHEAD_INIT; @@ -3463,7 +3463,8 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end, old_prot = entry->protection; if ((flags & VM_MAP_PROTECT_SET_MAXPROT) != 0) { - entry->max_protection = new_maxprot; + entry->max_protection = new_maxprot | + VM_PROT_NO_IMPLY_CAP; entry->protection = new_maxprot & old_prot; } if ((flags & VM_MAP_PROTECT_SET_PROT) != 0) @@ -6228,10 +6229,23 @@ vm_map_prot2perms(vm_prot_t prot) { int perms = 0; - if (prot & (VM_PROT_READ | VM_PROT_COPY)) - perms |= CHERI_PROT2PERM_READ_PERMS; - if (prot & VM_PROT_WRITE) - perms |= CHERI_PROT2PERM_WRITE_PERMS; + if (prot & (VM_PROT_CAP | VM_PROT_NO_IMPLY_CAP)) { + if (prot & (VM_PROT_READ | VM_PROT_COPY)) + perms |= CHERI_PROT2PERM_READ_PERMS; + if (prot & VM_PROT_READ_CAP) + perms |= CHERI_PROT2PERM_READ_CAP_PERMS; + if (prot & VM_PROT_WRITE) + perms |= CHERI_PROT2PERM_WRITE_PERMS; + if (prot & VM_PROT_WRITE_CAP) + perms |= CHERI_PROT2PERM_WRITE_CAP_PERMS; + } else { + if (prot & (VM_PROT_READ | VM_PROT_COPY)) + perms |= CHERI_PROT2PERM_READ_PERMS | + CHERI_PROT2PERM_READ_CAP_PERMS; + if (prot & VM_PROT_WRITE) + perms |= CHERI_PROT2PERM_WRITE_PERMS | + CHERI_PROT2PERM_WRITE_CAP_PERMS; + } if (prot & VM_PROT_EXECUTE) perms |= CHERI_PROT2PERM_EXEC_PERMS; @@ -6276,7 +6290,7 @@ vm_map_reservation_insert(vm_map_t map, vm_offset_t addr, vm_size_t length, new_entry->end = addr + length; new_entry->reservation = reservation; new_entry->next_read = addr; - new_entry->max_protection = max; + new_entry->max_protection = max | VM_PROT_NO_IMPLY_CAP; vm_map_entry_link(map, new_entry); vm_map_log("reserve", new_entry); diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 49fb83fa707e..e87e5036b33f 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -234,6 +234,35 @@ vm_wxcheck(struct proc *p, char *call) return (0); } +static inline int +vm_prot2vmprot(vm_prot_t *prot, const char *func, const char *protname) +{ + vm_prot_t vm_prot; + + KASSERT((*prot & ~_PROT_ALL) == 0, ("invalid bits in %s", protname)); + + if ((*prot & PROT_CAP) != 0 && + (*prot & (PROT_READ | PROT_WRITE)) == 0) { + SYSERRCAUSE( + "%s: PROT_CAP in %s without PROT_READ or PROT_WRITE", + func, protname); + return (ENOTSUP); + } + + vm_prot = (*prot & ~_PROT_CAP); + if ((*prot & PROT_CAP) != 0) { + if ((*prot & PROT_READ) != 0) + vm_prot |= VM_PROT_READ_CAP; + if ((*prot & PROT_WRITE) != 0) + vm_prot |= VM_PROT_WRITE_CAP; + } + if ((*prot & PROT_NO_CAP) != 0) + vm_prot |= VM_PROT_NO_IMPLY_CAP; + + *prot = vm_prot; + return (0); +} + /* * Memory Map (mmap) system call. Note that the file offset * and address are allowed to be NOT page aligned, though if @@ -417,11 +446,11 @@ kern_mmap_maxprot(struct proc *p, int prot) #endif if ((p->p_flag2 & P2_PROTMAX_DISABLE) != 0 || (p->p_fctl0 & NT_FREEBSD_FCTL_PROTMAX_DISABLE) != 0) - return (_PROT_ALL); + return (PROT_READ | PROT_WRITE | PROT_EXEC); if (((p->p_flag2 & P2_PROTMAX_ENABLE) != 0 || imply_prot_max) && prot != PROT_NONE) return (prot); - return (_PROT_ALL); + return (PROT_READ | PROT_WRITE | PROT_EXEC); } int @@ -435,16 +464,14 @@ kern_mmap(struct thread *td, const struct mmap_req *mrp) vm_pointer_t addr, orig_addr; vm_offset_t max_addr; vm_size_t len, pageoff, size; - vm_prot_t cap_maxprot; - int align, error, fd, flags, max_prot, prot; + vm_prot_t cap_maxprot, cap_prot, max_prot, prot; + int align, error, fd, flags; cap_rights_t rights; mmap_check_fp_fn check_fp_fn; - int cap_prot; orig_addr = addr = mrp->mr_hint; max_addr = mrp->mr_max_addr; len = mrp->mr_len; - prot = mrp->mr_prot; flags = mrp->mr_flags; fd = mrp->mr_fd; pos = mrp->mr_pos; @@ -452,24 +479,38 @@ kern_mmap(struct thread *td, const struct mmap_req *mrp) p = td->td_proc; - if ((prot & ~(_PROT_ALL | PROT_MAX(_PROT_ALL))) != 0) { + if ((mrp->mr_prot & ~(_PROT_ALL | PROT_MAX(_PROT_ALL))) != 0) { SYSERRCAUSE( "%s: invalid bits in prot %x", __func__, - (prot & ~(_PROT_ALL | PROT_MAX(_PROT_ALL)))); + (mrp->mr_prot & ~(_PROT_ALL | PROT_MAX(_PROT_ALL)))); return (EINVAL); } - max_prot = PROT_MAX_EXTRACT(prot); - prot = PROT_EXTRACT(prot); - if (max_prot != 0 && (max_prot & prot) != prot) { - SYSERRCAUSE( - "%s: requested page permissions exceed requested maximum", - __func__); - return (ENOTSUP); + max_prot = PROT_MAX_EXTRACT(mrp->mr_prot); + prot = PROT_EXTRACT(mrp->mr_prot); + /* Ensure max_prot is a superset of prot if non-zero */ + if (max_prot != 0) { + /* + * If prot contains explicit capability permissions then + * max_prot must as well. Add PROT_NO_CAP to both to allow + * a simple check that max_prot is a superset of prot. + * Adding to max_prot allows max_prot to contain PROT_CAP + * while prot contains only PROT_NO_CAP. Adding to prot + # ensures that prot doesn't later gain implied permissions + * while max_prot has PROT_NO_CAP and not PROT_CAP. + */ + if ((prot & _PROT_CAP) != 0 || (max_prot & _PROT_CAP) != 0) { + prot |= PROT_NO_CAP; + max_prot |= PROT_NO_CAP; + } + if ((max_prot & prot) != prot) { + SYSERRCAUSE("%s: requested page permissions exceed " + "requested maximum", __func__); + return (ENOTSUP); + } } if ((prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC) && (error = vm_wxcheck(p, "mmap"))) return (error); - /* * Always honor PROT_MAX if set. If not, default to all * permissions unless we're implying maximum permissions. @@ -550,13 +591,26 @@ kern_mmap(struct thread *td, const struct mmap_req *mrp) (prot & ~_PROT_ALL)); return (EINVAL); } - if ((flags & MAP_GUARD) != 0 && (prot != PROT_NONE || fd != -1 || + if ((flags & MAP_GUARD) != 0 && + ((prot != PROT_NONE && prot != PROT_NO_CAP) || fd != -1 || pos != 0 || (flags & ~(MAP_FIXED | MAP_GUARD | MAP_EXCL | MAP_RESERVATION_CREATE | MAP_32BIT | MAP_ALIGNMENT_MASK)) != 0)) { SYSERRCAUSE("%s: Invalid arguments with MAP_GUARD", __func__); return (EINVAL); } + error = vm_prot2vmprot(&prot, "mmap", "prot"); + if (error) + return (error); + error = vm_prot2vmprot(&max_prot, "mmap", "max prot"); + if (error) + return (error); + error = vm_prot2vmprot(&cap_prot, "mmap", "cap_prot"); + if (error) + return (error); + /* + * NB: Beyond this point, all prot flags are normalized to VM_PROT_*. + */ /* * Align the file position to a page boundary, @@ -714,8 +768,6 @@ kern_mmap(struct thread *td, const struct mmap_req *mrp) } else if ((flags & MAP_ANON) != 0) { /* * Mapping blank space is trivial. - * - * This relies on VM_PROT_* matching PROT_*. */ error = vm_mmap_object(&vms->vm_map, &addr, max_addr, size, VM_PROT_ADD_CAP(prot), VM_PROT_ADD_CAP(max_prot), flags, @@ -728,13 +780,13 @@ kern_mmap(struct thread *td, const struct mmap_req *mrp) * with maxprot later. */ cap_rights_init_one(&rights, CAP_MMAP); - if (cap_prot & PROT_READ) + if (cap_prot & VM_PROT_READ) cap_rights_set_one(&rights, CAP_MMAP_R); if ((flags & MAP_SHARED) != 0) { - if (cap_prot & PROT_WRITE) + if (cap_prot & VM_PROT_WRITE) cap_rights_set_one(&rights, CAP_MMAP_W); } - if (cap_prot & PROT_EXEC) + if (cap_prot & VM_PROT_EXECUTE) cap_rights_set_one(&rights, CAP_MMAP_X); error = fget_mmap(td, fd, &rights, &cap_maxprot, &fp); if (error != 0) @@ -744,6 +796,8 @@ kern_mmap(struct thread *td, const struct mmap_req *mrp) error = EINVAL; goto done; } + if ((cap_prot & (VM_PROT_READ_CAP | VM_PROT_WRITE_CAP)) != 0) + cap_maxprot = VM_PROT_ADD_CAP(cap_maxprot); if ((cap_prot & cap_maxprot) != cap_prot) { SYSERRCAUSE("%s: unable to map file with " "requested permissions", __func__); @@ -758,7 +812,6 @@ kern_mmap(struct thread *td, const struct mmap_req *mrp) } if (fp->f_ops == &shm_ops && shm_largepage(fp->f_data)) addr = orig_addr; - /* This relies on VM_PROT_* matching PROT_*. */ error = fo_mmap(fp, &vms->vm_map, &addr, max_addr, size, prot, max_prot, flags, pos, td); } @@ -1043,18 +1096,19 @@ sys_mprotect(struct thread *td, struct mprotect_args *uap) } int -kern_mprotect(struct thread *td, uintptr_t addr0, size_t size, int prot, +kern_mprotect(struct thread *td, uintptr_t addr0, size_t size, int userprot, int flags) { vm_offset_t addr; vm_size_t pageoff; - int vm_error, max_prot; + vm_prot_t max_prot, prot; + int error, vm_error; addr = addr0; - if ((prot & ~(_PROT_ALL | PROT_MAX(_PROT_ALL))) != 0) + if ((userprot & ~(_PROT_ALL | PROT_MAX(_PROT_ALL))) != 0) return (EINVAL); - max_prot = PROT_MAX_EXTRACT(prot); - prot = PROT_EXTRACT(prot); + max_prot = PROT_MAX_EXTRACT(userprot); + prot = PROT_EXTRACT(userprot); pageoff = (addr & PAGE_MASK); addr -= pageoff; size += pageoff; @@ -1074,10 +1128,22 @@ kern_mprotect(struct thread *td, uintptr_t addr0, size_t size, int prot, flags |= VM_MAP_PROTECT_SET_PROT | VM_MAP_PROTECT_KEEP_CAP; if (max_prot != 0) { + /* see comment in kern_mmap() */ + if ((prot & _PROT_CAP) != 0 || (max_prot & _PROT_CAP) != 0) { + prot |= PROT_NO_CAP; + max_prot |= PROT_NO_CAP; + } if ((max_prot & prot) != prot) return (ENOTSUP); flags |= VM_MAP_PROTECT_SET_MAXPROT; } + error = vm_prot2vmprot(&prot, "mprotect", "prot"); + if (error) + return (error); + error = vm_prot2vmprot(&max_prot, "mprotect", "max prot"); + if (error) + return (error); + vm_error = vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr, addr + size, prot, max_prot, flags); @@ -1873,7 +1939,7 @@ vm_mmap_cdev(struct thread *td, vm_size_t objsize, vm_prot_t *protp, if (dsw->d_flags & D_MMAP_ANON) { *objp = NULL; *foff = 0; - *maxprotp = VM_PROT_ALL; + *maxprotp = VM_PROT_ADD_CAP(VM_PROT_ALL); *protp = VM_PROT_ADD_CAP(*protp); *flagsp |= MAP_ANON; return (0);