From 4b47b0fa4b15e0de916e7dd93cd787fdab208ff2 Mon Sep 17 00:00:00 2001 From: Muhammad Muzammil Date: Fri, 13 Oct 2023 10:31:18 +0500 Subject: [PATCH 01/51] powerpc/bpf: Fixed 'instead' typo in bpf_jit_build_body() Fixed 'instead' typo. Signed-off-by: Muhammad Muzammil Signed-off-by: Michael Ellerman Link: https://msgid.link/20231013053118.11221-1-m.muzzammilashraf@gmail.com --- arch/powerpc/net/bpf_jit_comp32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 7f91ea064c087..bc7f92ec7f2d7 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -940,7 +940,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * !fp->aux->verifier_zext. Emit NOP otherwise. * * Note that "li reg_h,0" is emitted for BPF_B/H/W case, - * if necessary. So, jump there insted of emitting an + * if necessary. So, jump there instead of emitting an * additional "li reg_h,0" instruction. */ if (size == BPF_DW && !fp->aux->verifier_zext) From cc8ee288f484a2a59c01ccd4d8a417d6ed3466e3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:17 +0200 Subject: [PATCH 02/51] powerpc/40x: Remove stale PTE_ATOMIC_UPDATES macro 40x TLB handlers were reworked by commit 2c74e2586bb9 ("powerpc/40x: Rework 40x PTE access and TLB miss") to not require PTE_ATOMIC_UPDATES anymore. Then commit 4e1df545e2fa ("powerpc/pgtable: Drop PTE_ATOMIC_UPDATES") removed all code related to PTE_ATOMIC_UPDATES. Remove left over PTE_ATOMIC_UPDATES macro. Fixes: 2c74e2586bb9 ("powerpc/40x: Rework 40x PTE access and TLB miss") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/f061db5857fcd748f84a6707aad01754686ce97e.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pte-40x.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pte-40x.h b/arch/powerpc/include/asm/nohash/32/pte-40x.h index 6fe46e7545566..0b4e5f8ce3e8a 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-40x.h +++ b/arch/powerpc/include/asm/nohash/32/pte-40x.h @@ -69,9 +69,6 @@ #define _PTE_NONE_MASK 0 -/* Until my rework is finished, 40x still needs atomic PTE updates */ -#define PTE_ATOMIC_UPDATES 1 - #define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED) #define _PAGE_BASE (_PAGE_BASE_NC) From 3b8547ec4d35778c9f4cc261d85c0cae6c1a8ecb Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:18 +0200 Subject: [PATCH 03/51] powerpc: Remove pte_ERROR() pte_ERROR() is used neither in powerpc code nor in common mm code. Remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/bec9eb973ecc1cba091e5c9201d877a7797f3242.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/pgtable.h | 3 --- arch/powerpc/include/asm/book3s/64/pgtable.h | 2 -- arch/powerpc/include/asm/nohash/32/pgtable.h | 3 --- arch/powerpc/include/asm/nohash/64/pgtable.h | 2 -- 4 files changed, 10 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 9b13eb14e21bf..543c3691839b7 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -224,9 +224,6 @@ void unmap_kernel_page(unsigned long va); /* Bits to mask out from a PGD to get to the PUD page */ #define PGD_MASKED_BITS 0 -#define pte_ERROR(e) \ - pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \ - (unsigned long long)pte_val(e)) #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) /* diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 5c497c862d757..7c4ad1e03a49c 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1014,8 +1014,6 @@ static inline pmd_t *pud_pgtable(pud_t pud) return (pmd_t *)__va(pud_val(pud) & ~PUD_MASKED_BITS); } -#define pte_ERROR(e) \ - pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) #define pud_ERROR(e) \ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index f99c53a5f184b..868aecbec8d15 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -55,9 +55,6 @@ extern int icache_44x_need_flush; #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) -#define pte_ERROR(e) \ - pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \ - (unsigned long long)pte_val(e)) #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index eb6891e34cbde..8083c04a1e6da 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -269,8 +269,6 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, flush_tlb_page(vma, address); } -#define pte_ERROR(e) \ - pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) #define pgd_ERROR(e) \ From 93f81f6eea10f497e892c52998a2194b4e16c91d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:19 +0200 Subject: [PATCH 04/51] powerpc: Deduplicate prototypes of ptep_set_access_flags() and phys_mem_access_prot() Prototypes of ptep_set_access_flags() and phys_mem_access_prot() are identical for book3s and nohash. Deduplicate them. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/b846832cce842a2852615b7356937fb9507e436d.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/pgtable.h | 9 --------- arch/powerpc/include/asm/nohash/pgtable.h | 10 ---------- arch/powerpc/include/asm/pgtable.h | 10 ++++++++++ 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h index 3b7bd36a23210..6f4578daea6c8 100644 --- a/arch/powerpc/include/asm/book3s/pgtable.h +++ b/arch/powerpc/include/asm/book3s/pgtable.h @@ -9,15 +9,6 @@ #endif #ifndef __ASSEMBLY__ -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, - pte_t *ptep, pte_t entry, int dirty); - -struct file; -extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, - unsigned long size, pgprot_t vma_prot); -#define __HAVE_PHYS_MEM_ACCESS_PROT - void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); /* diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index c721478c59347..5b6647fb398b3 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -207,11 +207,6 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, mb(); } - -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, - pte_t *ptep, pte_t entry, int dirty); - /* * Macro to mark a page protection value as "uncacheable". */ @@ -240,11 +235,6 @@ extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addre #define pgprot_writecombine pgprot_noncached_wc -struct file; -extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, - unsigned long size, pgprot_t vma_prot); -#define __HAVE_PHYS_MEM_ACCESS_PROT - #ifdef CONFIG_HUGETLB_PAGE static inline int hugepd_ok(hugepd_t hpd) { diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index d0ee46de248ea..bcdbdeda65d3d 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -110,6 +110,16 @@ void mark_initmem_nx(void); static inline void mark_initmem_nx(void) { } #endif +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, + pte_t *ptep, pte_t entry, int dirty); + +struct file; +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot); +#define __HAVE_PHYS_MEM_ACCESS_PROT + + /* * When used, PTE_FRAG_NR is defined in subarch pgtable.h * so we are sure it is included when arriving here. From da9554e0fe3c7b46912a361a803b50f2655ff30f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:20 +0200 Subject: [PATCH 05/51] powerpc: Refactor update_mmu_cache_range() On nohash, this function voids except for E500 with hugepages. On book3s, this function is for hash MMUs only. Combine those tests and rename E500 update_mmu_cache_range() as __update_mmu_cache() which gets called by update_mmu_cache_range(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/b029842cb6783cbeb43d202e69a90341d65295a4.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/pgtable.h | 24 ----------------------- arch/powerpc/include/asm/nohash/pgtable.h | 15 -------------- arch/powerpc/include/asm/pgtable.h | 19 ++++++++++++++++++ arch/powerpc/mm/nohash/e500_hugetlbpage.c | 3 +-- 4 files changed, 20 insertions(+), 41 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h index 6f4578daea6c8..f42d68c6b3140 100644 --- a/arch/powerpc/include/asm/book3s/pgtable.h +++ b/arch/powerpc/include/asm/book3s/pgtable.h @@ -8,28 +8,4 @@ #include #endif -#ifndef __ASSEMBLY__ -void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); - -/* - * This gets called at the end of handling a page fault, when - * the kernel has put a new PTE into the page table for the process. - * We use it to ensure coherency between the i-cache and d-cache - * for the page which has just been mapped in. - * On machines which use an MMU hash table, we use this to put a - * corresponding HPTE into the hash table ahead of time, instead of - * waiting for the inevitable extra hash-table miss exception. - */ -static inline void update_mmu_cache_range(struct vm_fault *vmf, - struct vm_area_struct *vma, unsigned long address, - pte_t *ptep, unsigned int nr) -{ - if (IS_ENABLED(CONFIG_PPC32) && !mmu_has_feature(MMU_FTR_HPTE_TABLE)) - return; - if (radix_enabled()) - return; - __update_mmu_cache(vma, address, ptep); -} - -#endif /* __ASSEMBLY__ */ #endif diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 5b6647fb398b3..a9056f4fad48d 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -259,20 +259,5 @@ static inline int pud_huge(pud_t pud) #define is_hugepd(hpd) (hugepd_ok(hpd)) #endif -/* - * This gets called at the end of handling a page fault, when - * the kernel has put a new PTE into the page table for the process. - * We use it to ensure coherency between the i-cache and d-cache - * for the page which has just been mapped in. - */ -#if defined(CONFIG_PPC_E500) && defined(CONFIG_HUGETLB_PAGE) -void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, - unsigned long address, pte_t *ptep, unsigned int nr); -#else -static inline void update_mmu_cache_range(struct vm_fault *vmf, - struct vm_area_struct *vma, unsigned long address, - pte_t *ptep, unsigned int nr) {} -#endif - #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index bcdbdeda65d3d..966e7c5119f63 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -119,6 +119,25 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot); #define __HAVE_PHYS_MEM_ACCESS_PROT +void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); + +/* + * This gets called at the end of handling a page fault, when + * the kernel has put a new PTE into the page table for the process. + * We use it to ensure coherency between the i-cache and d-cache + * for the page which has just been mapped in. + * On machines which use an MMU hash table, we use this to put a + * corresponding HPTE into the hash table ahead of time, instead of + * waiting for the inevitable extra hash-table miss exception. + */ +static inline void update_mmu_cache_range(struct vm_fault *vmf, + struct vm_area_struct *vma, unsigned long address, + pte_t *ptep, unsigned int nr) +{ + if ((mmu_has_feature(MMU_FTR_HPTE_TABLE) && !radix_enabled()) || + (IS_ENABLED(CONFIG_PPC_E500) && IS_ENABLED(CONFIG_HUGETLB_PAGE))) + __update_mmu_cache(vma, address, ptep); +} /* * When used, PTE_FRAG_NR is defined in subarch pgtable.h diff --git a/arch/powerpc/mm/nohash/e500_hugetlbpage.c b/arch/powerpc/mm/nohash/e500_hugetlbpage.c index 6b30e40d45903..a134d28a0e4d3 100644 --- a/arch/powerpc/mm/nohash/e500_hugetlbpage.c +++ b/arch/powerpc/mm/nohash/e500_hugetlbpage.c @@ -178,8 +178,7 @@ book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte) * * This must always be called with the pte lock held. */ -void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, - unsigned long address, pte_t *ptep, unsigned int nr) +void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { if (is_vm_hugetlb_page(vma)) book3e_hugetlb_preload(vma, address, *ptep); From d3e01796728add53ab778298573772d44d52d19c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:21 +0200 Subject: [PATCH 06/51] powerpc: Untangle fixmap.h and pgtable.h and mmu.h fixmap.h need pgtable.h for [un]map_kernel_page() pgtable.h need fixmap.h for FIXADDR_TOP. Untangle the two files by moving FIXADDR_TOP into pgtable.h Also move VIRT_IMMR_BASE to fixmap.h to avoid fixmap.h in mmu.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/5eba12392a018be28ad0a02ed844767b132589e7.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/pgtable.h | 9 ++++++++- arch/powerpc/include/asm/book3s/64/pgtable.h | 1 + arch/powerpc/include/asm/fixmap.h | 16 ++++------------ arch/powerpc/include/asm/nohash/32/mmu-8xx.h | 1 - arch/powerpc/include/asm/nohash/32/pgtable.h | 9 ++++++++- arch/powerpc/include/asm/nohash/64/pgtable.h | 1 + arch/powerpc/mm/init_32.c | 1 + arch/powerpc/mm/mem.c | 1 + arch/powerpc/mm/nohash/8xx.c | 2 ++ arch/powerpc/platforms/83xx/misc.c | 2 ++ arch/powerpc/platforms/8xx/cpm1.c | 1 + 11 files changed, 29 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 543c3691839b7..45b69ae2631e0 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -170,7 +170,14 @@ void unmap_kernel_page(unsigned long va); * value (for now) on others, from where we can start layout kernel * virtual space that goes below PKMAP and FIXMAP */ -#include + +#define FIXADDR_SIZE 0 +#ifdef CONFIG_KASAN +#include +#define FIXADDR_TOP (KASAN_SHADOW_START - PAGE_SIZE) +#else +#define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE)) +#endif /* * ioremap_bot starts at that address. Early ioremaps move down from there, diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 7c4ad1e03a49c..dbd545e73161a 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -316,6 +316,7 @@ extern unsigned long pci_io_base; #define IOREMAP_START (ioremap_bot) #define IOREMAP_END (KERN_IO_END - FIXADDR_SIZE) #define FIXADDR_SIZE SZ_32M +#define FIXADDR_TOP (IOREMAP_END + FIXADDR_SIZE) #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index a832aeafe5601..f9068dd8dfce7 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -23,18 +23,6 @@ #include #endif -#ifdef CONFIG_PPC64 -#define FIXADDR_TOP (IOREMAP_END + FIXADDR_SIZE) -#else -#define FIXADDR_SIZE 0 -#ifdef CONFIG_KASAN -#include -#define FIXADDR_TOP (KASAN_SHADOW_START - PAGE_SIZE) -#else -#define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE)) -#endif -#endif - /* * Here we define all the compile-time 'special' virtual * addresses. The point is to have a constant address at @@ -119,5 +107,9 @@ static inline void __set_fixmap(enum fixed_addresses idx, #define __early_set_fixmap __set_fixmap +#ifdef CONFIG_PPC_8xx +#define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE)) +#endif + #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 0e93a4728c9e1..141d82e249a86 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -188,7 +188,6 @@ typedef struct { } mm_context_t; #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000) -#define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE)) /* Page size definitions, common between 32 and 64-bit * diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 868aecbec8d15..c8311ee088116 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -71,7 +71,14 @@ void unmap_kernel_page(unsigned long va); * value (for now) on others, from where we can start layout kernel * virtual space that goes below PKMAP and FIXMAP */ -#include + +#define FIXADDR_SIZE 0 +#ifdef CONFIG_KASAN +#include +#define FIXADDR_TOP (KASAN_SHADOW_START - PAGE_SIZE) +#else +#define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE)) +#endif /* * ioremap_bot starts at that address. Early ioremaps move down from there, diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 8083c04a1e6da..dee3fc654d401 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -57,6 +57,7 @@ #define IOREMAP_START (ioremap_bot) #define IOREMAP_END (KERN_IO_START + KERN_IO_SIZE - FIXADDR_SIZE) #define FIXADDR_SIZE SZ_32M +#define FIXADDR_TOP (IOREMAP_END + FIXADDR_SIZE) /* * Defines the address of the vmemap area, in its own region on diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index d8adc452f4310..4e71dfe7d026d 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -39,6 +39,7 @@ #include #include #include +#include #include diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 8b121df7b08f8..08f3ec9d522b0 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -26,6 +26,7 @@ #include #include #include +#include #include diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index dbbfe897455dc..bb9c39b449d17 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -10,6 +10,8 @@ #include #include +#include + #include #define IMMR_SIZE (FIX_IMMR_SIZE << PAGE_SHIFT) diff --git a/arch/powerpc/platforms/83xx/misc.c b/arch/powerpc/platforms/83xx/misc.c index 2fb2a85d131fd..1135c1ab923cc 100644 --- a/arch/powerpc/platforms/83xx/misc.c +++ b/arch/powerpc/platforms/83xx/misc.c @@ -14,6 +14,8 @@ #include #include #include +#include + #include #include diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c index ebb5f6a27dbf3..b24d4102fbf61 100644 --- a/arch/powerpc/platforms/8xx/cpm1.c +++ b/arch/powerpc/platforms/8xx/cpm1.c @@ -40,6 +40,7 @@ #include #include #include +#include #include From 81fbb9997057b6e6e5795a08d9a8e10e9f48236f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:22 +0200 Subject: [PATCH 07/51] powerpc/nohash: Remove {pte/pmd}_protnone() Only book3s/64 selects ARCH_SUPPORTS_NUMA_BALANCING so CONFIG_NUMA_BALANCING can't be selected on nohash targets. Remove pte_protnone() and pmd_protnone(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/a50c1a19828a8eced82cbcc5c61754b667037d21.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/pgtable.h | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index a9056f4fad48d..ab26af2b421ae 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -35,23 +35,6 @@ static inline bool pte_hashpte(pte_t pte) { return false; } static inline bool pte_ci(pte_t pte) { return pte_val(pte) & _PAGE_NO_CACHE; } static inline bool pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC; } -#ifdef CONFIG_NUMA_BALANCING -/* - * These work without NUMA balancing but the kernel does not care. See the - * comment in include/linux/pgtable.h . On powerpc, this will only - * work for user pages and always return true for kernel pages. - */ -static inline int pte_protnone(pte_t pte) -{ - return pte_present(pte) && !pte_user(pte); -} - -static inline int pmd_protnone(pmd_t pmd) -{ - return pte_protnone(pmd_pte(pmd)); -} -#endif /* CONFIG_NUMA_BALANCING */ - static inline int pte_present(pte_t pte) { return pte_val(pte) & _PAGE_PRESENT; From 7835006979e5415aa4c9bc0e3e7063b5c5943ed4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:23 +0200 Subject: [PATCH 08/51] powerpc/nohash: Refactor declaration of {map/unmap}_kernel_page() map_kernel_page() and unmap_kernel_page() have the same prototypes on nohash/32 and nohash/64, keep only one declaration. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/7fec5f3288cf0d0eac61b1b3f48c3ea54eb80cad.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 8 -------- arch/powerpc/include/asm/nohash/64/pgtable.h | 2 -- arch/powerpc/include/asm/nohash/pgtable.h | 3 +++ arch/powerpc/mm/nohash/book3e_pgtable.c | 2 +- 4 files changed, 4 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index c8311ee088116..26289e4e767c9 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -58,14 +58,6 @@ extern int icache_44x_need_flush; #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) -#ifndef __ASSEMBLY__ - -int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); -void unmap_kernel_page(unsigned long va); - -#endif /* !__ASSEMBLY__ */ - - /* * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary * value (for now) on others, from where we can start layout kernel diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index dee3fc654d401..f5a8e8a9dba46 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -309,8 +309,6 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, /* We borrow MSB 56 (LSB 7) to store the exclusive marker in swap PTEs. */ #define _PAGE_SWP_EXCLUSIVE 0x80 -int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot); -void unmap_kernel_page(unsigned long va); extern int __meminit vmemmap_create_mapping(unsigned long start, unsigned long page_size, unsigned long phys); diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index ab26af2b421ae..3d684b500fe61 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -242,5 +242,8 @@ static inline int pud_huge(pud_t pud) #define is_hugepd(hpd) (hugepd_ok(hpd)) #endif +int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); +void unmap_kernel_page(unsigned long va); + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c index b80fc4a91a534..1c5e4ecbebebf 100644 --- a/arch/powerpc/mm/nohash/book3e_pgtable.c +++ b/arch/powerpc/mm/nohash/book3e_pgtable.c @@ -71,7 +71,7 @@ static void __init *early_alloc_pgtable(unsigned long size) * map_kernel_page adds an entry to the ioremap page table * and adds an entry to the HPT, possibly bolting it */ -int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) +int __ref map_kernel_page(unsigned long ea, phys_addr_t pa, pgprot_t prot) { pgd_t *pgdp; p4d_t *p4dp; From 4c1a89d983be951a3e39d7f9c1d6987f3054e32d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:24 +0200 Subject: [PATCH 09/51] powerpc/nohash: Move 8xx version of pte_update() into pte-8xx.h No point in having 8xx special pte_update() in common header, move it into pte-8xx.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/17e209b1a1a43ed219e9e1f2947ec594ed4f9394.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 57 +------------------- arch/powerpc/include/asm/nohash/32/pte-8xx.h | 57 ++++++++++++++++++++ 2 files changed, 58 insertions(+), 56 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 26289e4e767c9..be8bca42bdce3 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -219,63 +219,8 @@ static inline void pmd_clear(pmd_t *pmdp) * that an executable user mapping was modified, which is needed * to properly flush the virtually tagged instruction cache of * those implementations. - * - * On the 8xx, the page tables are a bit special. For 16k pages, we have - * 4 identical entries. For 512k pages, we have 128 entries as if it was - * 4k pages, but they are flagged as 512k pages for the hardware. - * For other page sizes, we have a single entry in the table. */ -#ifdef CONFIG_PPC_8xx -static pmd_t *pmd_off(struct mm_struct *mm, unsigned long addr); -static int hugepd_ok(hugepd_t hpd); - -static int number_of_cells_per_pte(pmd_t *pmd, pte_basic_t val, int huge) -{ - if (!huge) - return PAGE_SIZE / SZ_4K; - else if (hugepd_ok(*((hugepd_t *)pmd))) - return 1; - else if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && !(val & _PAGE_HUGE)) - return SZ_16K / SZ_4K; - else - return SZ_512K / SZ_4K; -} - -static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, - unsigned long clr, unsigned long set, int huge) -{ - pte_basic_t *entry = (pte_basic_t *)p; - pte_basic_t old = pte_val(*p); - pte_basic_t new = (old & ~(pte_basic_t)clr) | set; - int num, i; - pmd_t *pmd = pmd_off(mm, addr); - - num = number_of_cells_per_pte(pmd, new, huge); - - for (i = 0; i < num; i += PAGE_SIZE / SZ_4K, new += PAGE_SIZE) { - *entry++ = new; - if (IS_ENABLED(CONFIG_PPC_16K_PAGES) && num != 1) { - *entry++ = new; - *entry++ = new; - *entry++ = new; - } - } - - return old; -} - -#ifdef CONFIG_PPC_16K_PAGES -#define ptep_get ptep_get -static inline pte_t ptep_get(pte_t *ptep) -{ - pte_basic_t val = READ_ONCE(ptep->pte); - pte_t pte = {val, val, val, val}; - - return pte; -} -#endif /* CONFIG_PPC_16K_PAGES */ - -#else +#ifndef pte_update static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, unsigned long clr, unsigned long set, int huge) { diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index e6fe1d5731f2c..52395a5ecd707 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -187,6 +187,63 @@ static inline unsigned long pte_leaf_size(pte_t pte) #define pte_leaf_size pte_leaf_size +/* + * On the 8xx, the page tables are a bit special. For 16k pages, we have + * 4 identical entries. For 512k pages, we have 128 entries as if it was + * 4k pages, but they are flagged as 512k pages for the hardware. + * For other page sizes, we have a single entry in the table. + */ +static pmd_t *pmd_off(struct mm_struct *mm, unsigned long addr); +static int hugepd_ok(hugepd_t hpd); + +static inline int number_of_cells_per_pte(pmd_t *pmd, pte_basic_t val, int huge) +{ + if (!huge) + return PAGE_SIZE / SZ_4K; + else if (hugepd_ok(*((hugepd_t *)pmd))) + return 1; + else if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && !(val & _PAGE_HUGE)) + return SZ_16K / SZ_4K; + else + return SZ_512K / SZ_4K; +} + +static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, + unsigned long clr, unsigned long set, int huge) +{ + pte_basic_t *entry = (pte_basic_t *)p; + pte_basic_t old = pte_val(*p); + pte_basic_t new = (old & ~(pte_basic_t)clr) | set; + int num, i; + pmd_t *pmd = pmd_off(mm, addr); + + num = number_of_cells_per_pte(pmd, new, huge); + + for (i = 0; i < num; i += PAGE_SIZE / SZ_4K, new += PAGE_SIZE) { + *entry++ = new; + if (IS_ENABLED(CONFIG_PPC_16K_PAGES) && num != 1) { + *entry++ = new; + *entry++ = new; + *entry++ = new; + } + } + + return old; +} + +#define pte_update pte_update + +#ifdef CONFIG_PPC_16K_PAGES +#define ptep_get ptep_get +static inline pte_t ptep_get(pte_t *ptep) +{ + pte_basic_t val = READ_ONCE(ptep->pte); + pte_t pte = {val, val, val, val}; + + return pte; +} +#endif /* CONFIG_PPC_16K_PAGES */ + #endif #endif /* __KERNEL__ */ From 0f4027eab59261f2fb72586f18efb44be3594dd4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:25 +0200 Subject: [PATCH 10/51] powerpc/nohash: Replace #ifdef CONFIG_44x by IS_ENABLED(CONFIG_44x) in pgtable.h No need of a #ifdef, use IS_ENABLED(CONFIG_44x) Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/7c7d97322a6a05a9842b1e8c4b41265916f542ca.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index be8bca42bdce3..a74476de1ef67 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -9,9 +9,7 @@ #include #include /* For sub-arch specific PPC_PIN_SIZE */ -#ifdef CONFIG_44x extern int icache_44x_need_flush; -#endif #endif /* __ASSEMBLY__ */ @@ -229,10 +227,9 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p *p = __pte(new); -#ifdef CONFIG_44x - if ((old & _PAGE_USER) && (old & _PAGE_EXEC)) + if (IS_ENABLED(CONFIG_44x) && (old & _PAGE_USER) && (old & _PAGE_EXEC)) icache_44x_need_flush = 1; -#endif + return old; } #endif From 42a2722319f0d3d5612ca8efd3ce7d7eae512291 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:26 +0200 Subject: [PATCH 11/51] powerpc/nohash: Refactor pte_update() pte_update() is similar. Take the nohash/32 version which works on nohash/64 and add the debug call to assert_pte_locked() which is only on nohash/64. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/e01cb630cad42f645915ce7702d23985241b71fc.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 33 --------------- arch/powerpc/include/asm/nohash/64/pgtable.h | 17 -------- arch/powerpc/include/asm/nohash/pgtable.h | 42 ++++++++++++++++++++ 3 files changed, 42 insertions(+), 50 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index a74476de1ef67..ae7f3c8afd4fa 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -9,8 +9,6 @@ #include #include /* For sub-arch specific PPC_PIN_SIZE */ -extern int icache_44x_need_flush; - #endif /* __ASSEMBLY__ */ #define PTE_INDEX_SIZE PTE_SHIFT @@ -203,37 +201,6 @@ static inline void pmd_clear(pmd_t *pmdp) *pmdp = __pmd(0); } -/* - * PTE updates. This function is called whenever an existing - * valid PTE is updated. This does -not- include set_pte_at() - * which nowadays only sets a new PTE. - * - * Depending on the type of MMU, we may need to use atomic updates - * and the PTE may be either 32 or 64 bit wide. In the later case, - * when using atomic updates, only the low part of the PTE is - * accessed atomically. - * - * In addition, on 44x, we also maintain a global flag indicating - * that an executable user mapping was modified, which is needed - * to properly flush the virtually tagged instruction cache of - * those implementations. - */ -#ifndef pte_update -static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, - unsigned long clr, unsigned long set, int huge) -{ - pte_basic_t old = pte_val(*p); - pte_basic_t new = (old & ~(pte_basic_t)clr) | set; - - *p = __pte(new); - - if (IS_ENABLED(CONFIG_44x) && (old & _PAGE_USER) && (old & _PAGE_EXEC)) - icache_44x_need_flush = 1; - - return old; -} -#endif - #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int __ptep_test_and_clear_young(struct mm_struct *mm, unsigned long addr, pte_t *ptep) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index f5a8e8a9dba46..b149a39f26853 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -171,23 +171,6 @@ static inline void p4d_set(p4d_t *p4dp, unsigned long val) *p4dp = __p4d(val); } -/* Atomic PTE updates */ -static inline unsigned long pte_update(struct mm_struct *mm, - unsigned long addr, - pte_t *ptep, unsigned long clr, - unsigned long set, - int huge) -{ - unsigned long old = pte_val(*ptep); - *ptep = __pte((old & ~clr) | set); - - /* huge pages use the old page table lock */ - if (!huge) - assert_pte_locked(mm, addr); - - return old; -} - static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 3d684b500fe61..bd5c3a4baabd0 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -2,6 +2,11 @@ #ifndef _ASM_POWERPC_NOHASH_PGTABLE_H #define _ASM_POWERPC_NOHASH_PGTABLE_H +#ifndef __ASSEMBLY__ +static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, + unsigned long clr, unsigned long set, int huge); +#endif + #if defined(CONFIG_PPC64) #include #else @@ -18,6 +23,43 @@ #ifndef __ASSEMBLY__ +extern int icache_44x_need_flush; + +/* + * PTE updates. This function is called whenever an existing + * valid PTE is updated. This does -not- include set_pte_at() + * which nowadays only sets a new PTE. + * + * Depending on the type of MMU, we may need to use atomic updates + * and the PTE may be either 32 or 64 bit wide. In the later case, + * when using atomic updates, only the low part of the PTE is + * accessed atomically. + * + * In addition, on 44x, we also maintain a global flag indicating + * that an executable user mapping was modified, which is needed + * to properly flush the virtually tagged instruction cache of + * those implementations. + */ +#ifndef pte_update +static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, + unsigned long clr, unsigned long set, int huge) +{ + pte_basic_t old = pte_val(*p); + pte_basic_t new = (old & ~(pte_basic_t)clr) | set; + + *p = __pte(new); + + if (IS_ENABLED(CONFIG_44x) && (old & _PAGE_USER) && (old & _PAGE_EXEC)) + icache_44x_need_flush = 1; + + /* huge pages use the old page table lock */ + if (!huge) + assert_pte_locked(mm, addr); + + return old; +} +#endif + /* Generic accessors to PTE bits */ #ifndef pte_write static inline int pte_write(pte_t pte) From 7c929ad0b3167e980a3963e03403a761138a4350 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:27 +0200 Subject: [PATCH 12/51] powerpc/nohash: Refactor checking of no-change in pte_update() On nohash/64, a few callers of pte_update() check if there is really a change in order to avoid an unnecessary write. Refactor that inside pte_update(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/076563e611c2b51036686a8d378bfd5ef1726341.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/64/pgtable.h | 9 --------- arch/powerpc/include/asm/nohash/pgtable.h | 3 +++ 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index b149a39f26853..cba08a62c52cd 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -181,8 +181,6 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, { unsigned long old; - if (!pte_young(*ptep)) - return 0; old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0); return (old & _PAGE_ACCESSED) != 0; } @@ -198,10 +196,6 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - - if ((pte_val(*ptep) & _PAGE_RW) == 0) - return; - pte_update(mm, addr, ptep, _PAGE_RW, 0, 0); } @@ -209,9 +203,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - if ((pte_val(*ptep) & _PAGE_RW) == 0) - return; - pte_update(mm, addr, ptep, _PAGE_RW, 0, 1); } diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index bd5c3a4baabd0..8adaacbbdd1d6 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -47,6 +47,9 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p pte_basic_t old = pte_val(*p); pte_basic_t new = (old & ~(pte_basic_t)clr) | set; + if (new == old) + return old; + *p = __pte(new); if (IS_ENABLED(CONFIG_44x) && (old & _PAGE_USER) && (old & _PAGE_EXEC)) From 27672be7751f25566e69bc228c8b8440a0772f8b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:28 +0200 Subject: [PATCH 13/51] powerpc/nohash: Deduplicate _PAGE_CHG_MASK _PAGE_CHG_MASK is identical between nohash/32 and nohash/64, deduplicate it. While at it, clean the #ifdef for PTE_RPN_MASK in nohash/32 as it is already CONFIG_PPC32. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/c1a1893dbba4afb825bfa78b262f0b0e0fc3b490.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 8 +------- arch/powerpc/include/asm/nohash/64/pgtable.h | 6 ------ arch/powerpc/include/asm/nohash/pgtable.h | 6 ++++++ 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index ae7f3c8afd4fa..a39ecd4980841 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -143,7 +143,7 @@ * The mask covered by the RPN must be a ULL on 32-bit platforms with * 64-bit PTEs. */ -#if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) +#ifdef CONFIG_PTE_64BIT #define PTE_RPN_MASK (~((1ULL << PTE_RPN_SHIFT) - 1)) #define MAX_POSSIBLE_PHYSMEM_BITS 36 #else @@ -151,12 +151,6 @@ #define MAX_POSSIBLE_PHYSMEM_BITS 32 #endif -/* - * _PAGE_CHG_MASK masks of bits that are to be preserved across - * pgprot changes. - */ -#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPECIAL) - #ifndef __ASSEMBLY__ #define pte_clear(mm, addr, ptep) \ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index cba08a62c52cd..34a518a1c04d0 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -75,12 +75,6 @@ #define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1)) -/* - * _PAGE_CHG_MASK masks of bits that are to be preserved across - * pgprot changes. - */ -#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPECIAL) - #define H_PAGE_4K_PFN 0 #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 8adaacbbdd1d6..c64a040f4a6a7 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -13,6 +13,12 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p #include #endif +/* + * _PAGE_CHG_MASK masks of bits that are to be preserved across + * pgprot changes. + */ +#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPECIAL) + /* Permission masks used for kernel mappings */ #define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW) #define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE) From 3a4288164d631b88a57119777b15099eb23c6fbf Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:29 +0200 Subject: [PATCH 14/51] powerpc/nohash: Deduplicate pte helpers Deduplicate following helpers that are identical on nohash/32 and nohash/64: pte_mkwrite_novma() pte_mkdirty() pte_mkyoung() pte_wrprotect() pte_mkexec() pte_young() Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/485f3cbafaa948143f92692e17ca652dfed68da2.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 36 -------------------- arch/powerpc/include/asm/nohash/64/pgtable.h | 25 -------------- arch/powerpc/include/asm/nohash/pgtable.h | 36 ++++++++++++++++++++ 3 files changed, 36 insertions(+), 61 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index a39ecd4980841..de51f78449a05 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -156,37 +156,6 @@ #define pte_clear(mm, addr, ptep) \ do { pte_update(mm, addr, ptep, ~0, 0, 0); } while (0) -#ifndef pte_mkwrite_novma -static inline pte_t pte_mkwrite_novma(pte_t pte) -{ - return __pte(pte_val(pte) | _PAGE_RW); -} -#endif - -static inline pte_t pte_mkdirty(pte_t pte) -{ - return __pte(pte_val(pte) | _PAGE_DIRTY); -} - -static inline pte_t pte_mkyoung(pte_t pte) -{ - return __pte(pte_val(pte) | _PAGE_ACCESSED); -} - -#ifndef pte_wrprotect -static inline pte_t pte_wrprotect(pte_t pte) -{ - return __pte(pte_val(pte) & ~_PAGE_RW); -} -#endif - -#ifndef pte_mkexec -static inline pte_t pte_mkexec(pte_t pte) -{ - return __pte(pte_val(pte) | _PAGE_EXEC); -} -#endif - #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (pmd_val(pmd) & _PMD_BAD) #define pmd_present(pmd) (pmd_val(pmd) & _PMD_PRESENT_MASK) @@ -238,11 +207,6 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, } #endif -static inline int pte_young(pte_t pte) -{ - return pte_val(pte) & _PAGE_ACCESSED; -} - /* * Note that on Book E processors, the pmd contains the kernel virtual * (lowmem) address of the pte page. The physical address is less useful diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 34a518a1c04d0..e8bbc6ec10849 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -80,26 +80,6 @@ #ifndef __ASSEMBLY__ /* pte_clear moved to later in this file */ -static inline pte_t pte_mkwrite_novma(pte_t pte) -{ - return __pte(pte_val(pte) | _PAGE_RW); -} - -static inline pte_t pte_mkdirty(pte_t pte) -{ - return __pte(pte_val(pte) | _PAGE_DIRTY); -} - -static inline pte_t pte_mkyoung(pte_t pte) -{ - return __pte(pte_val(pte) | _PAGE_ACCESSED); -} - -static inline pte_t pte_wrprotect(pte_t pte) -{ - return __pte(pte_val(pte) & ~_PAGE_RW); -} - #define PMD_BAD_BITS (PTE_TABLE_SIZE-1) #define PUD_BAD_BITS (PMD_TABLE_SIZE-1) @@ -165,11 +145,6 @@ static inline void p4d_set(p4d_t *p4dp, unsigned long val) *p4dp = __p4d(val); } -static inline int pte_young(pte_t pte) -{ - return pte_val(pte) & _PAGE_ACCESSED; -} - static inline int __ptep_test_and_clear_young(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index c64a040f4a6a7..21f232d2e34f1 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -70,6 +70,37 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p #endif /* Generic accessors to PTE bits */ +#ifndef pte_mkwrite_novma +static inline pte_t pte_mkwrite_novma(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_RW); +} +#endif + +static inline pte_t pte_mkdirty(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_DIRTY); +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_ACCESSED); +} + +#ifndef pte_wrprotect +static inline pte_t pte_wrprotect(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_RW); +} +#endif + +#ifndef pte_mkexec +static inline pte_t pte_mkexec(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_EXEC); +} +#endif + #ifndef pte_write static inline int pte_write(pte_t pte) { @@ -96,6 +127,11 @@ static inline bool pte_hw_valid(pte_t pte) return pte_val(pte) & _PAGE_PRESENT; } +static inline int pte_young(pte_t pte) +{ + return pte_val(pte) & _PAGE_ACCESSED; +} + /* * Don't just check for any non zero bits in __PAGE_USER, since for book3e * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in From 8c3d9eb323bbf2b37cdc5c01ebf9604175b5970f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:30 +0200 Subject: [PATCH 15/51] powerpc/nohash: Refactor ptep_test_and_clear_young() Remove ptep_test_and_clear_young() macro, make __ptep_test_and_clear_young() common to nohash/32 and nohash/64 and change it to become ptep_test_and_clear_young() Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/de90679ae169104fa3c98d0a8828d7ede228fc52.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 11 ----------- arch/powerpc/include/asm/nohash/64/pgtable.h | 19 +------------------ arch/powerpc/include/asm/nohash/pgtable.h | 11 +++++++++++ 3 files changed, 12 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index de51f78449a05..b7605000bd919 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -164,17 +164,6 @@ static inline void pmd_clear(pmd_t *pmdp) *pmdp = __pmd(0); } -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -static inline int __ptep_test_and_clear_young(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - unsigned long old; - old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0); - return (old & _PAGE_ACCESSED) != 0; -} -#define ptep_test_and_clear_young(__vma, __addr, __ptep) \ - __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep) - #define __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index e8bbc6ec10849..56041036fa34e 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -145,22 +145,6 @@ static inline void p4d_set(p4d_t *p4dp, unsigned long val) *p4dp = __p4d(val); } -static inline int __ptep_test_and_clear_young(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - unsigned long old; - - old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0); - return (old & _PAGE_ACCESSED) != 0; -} -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -#define ptep_test_and_clear_young(__vma, __addr, __ptep) \ -({ \ - int __r; \ - __r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \ - __r; \ -}) - #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) @@ -178,8 +162,7 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH #define ptep_clear_flush_young(__vma, __address, __ptep) \ ({ \ - int __young = __ptep_test_and_clear_young((__vma)->vm_mm, __address, \ - __ptep); \ + int __young = ptep_test_and_clear_young(__vma, __address, __ptep);\ __young; \ }) diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 21f232d2e34f1..2b043b72f642e 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -69,6 +69,17 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p } #endif +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + unsigned long old; + + old = pte_update(vma->vm_mm, addr, ptep, _PAGE_ACCESSED, 0, 0); + + return (old & _PAGE_ACCESSED) != 0; +} +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG + /* Generic accessors to PTE bits */ #ifndef pte_mkwrite_novma static inline pte_t pte_mkwrite_novma(pte_t pte) From cc68d77febe055b6499dda5fa13bda976a12a85c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:31 +0200 Subject: [PATCH 16/51] powerpc/nohash: Deduplicate ptep_set_wrprotect() and ptep_get_and_clear() ptep_set_wrprotect() and ptep_get_and_clear are identical for nohash/32 and nohash/64. Make them common. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/ffe46edecdabce915e2d1a4b79a3b2ab770f2248.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 16 ---------------- arch/powerpc/include/asm/nohash/64/pgtable.h | 15 --------------- arch/powerpc/include/asm/nohash/pgtable.h | 16 ++++++++++++++++ 3 files changed, 16 insertions(+), 31 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index b7605000bd919..0be464af4cb1e 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -164,22 +164,6 @@ static inline void pmd_clear(pmd_t *pmdp) *pmdp = __pmd(0); } -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) -{ - return __pte(pte_update(mm, addr, ptep, ~0, 0, 0)); -} - -#define __HAVE_ARCH_PTEP_SET_WRPROTECT -#ifndef ptep_set_wrprotect -static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) -{ - pte_update(mm, addr, ptep, _PAGE_RW, 0, 0); -} -#endif - #ifndef __ptep_set_access_flags static inline void __ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, pte_t entry, diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 56041036fa34e..dc6e35c3a53f3 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -145,13 +145,6 @@ static inline void p4d_set(p4d_t *p4dp, unsigned long val) *p4dp = __p4d(val); } -#define __HAVE_ARCH_PTEP_SET_WRPROTECT -static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) -{ - pte_update(mm, addr, ptep, _PAGE_RW, 0, 0); -} - #define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) @@ -166,14 +159,6 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, __young; \ }) -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0); - return __pte(old); -} - static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t * ptep) { diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 2b043b72f642e..7e810a84ac150 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -80,6 +80,22 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, } #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +#ifndef ptep_set_wrprotect +static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + pte_update(mm, addr, ptep, _PAGE_RW, 0, 0); +} +#endif +#define __HAVE_ARCH_PTEP_SET_WRPROTECT + +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + return __pte(pte_update(mm, addr, ptep, ~0UL, 0, 0)); +} +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR + /* Generic accessors to PTE bits */ #ifndef pte_mkwrite_novma static inline pte_t pte_mkwrite_novma(pte_t pte) From 2ef9f4bb9c47ed30ff3c7961744cae545c034154 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:32 +0200 Subject: [PATCH 17/51] powerpc/nohash: Refactor pte_clear() pte_clear() are doing the same on nohash/32 and nohash/64, Keep the static inline version of nohash/64, make it common and remove the macro version of nohash/32. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/818f7df83d7e9e18f55e274cd3c44f2871ade4dd.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 3 --- arch/powerpc/include/asm/nohash/64/pgtable.h | 7 ------- arch/powerpc/include/asm/nohash/pgtable.h | 5 +++++ 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 0be464af4cb1e..481594097f460 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -153,9 +153,6 @@ #ifndef __ASSEMBLY__ -#define pte_clear(mm, addr, ptep) \ - do { pte_update(mm, addr, ptep, ~0, 0, 0); } while (0) - #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (pmd_val(pmd) & _PMD_BAD) #define pmd_present(pmd) (pmd_val(pmd) & _PMD_PRESENT_MASK) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index dc6e35c3a53f3..b59fbf754f820 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -159,13 +159,6 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, __young; \ }) -static inline void pte_clear(struct mm_struct *mm, unsigned long addr, - pte_t * ptep) -{ - pte_update(mm, addr, ptep, ~0UL, 0, 0); -} - - /* Set the dirty and/or accessed bits atomically in a linux PTE */ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, pte_t entry, diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 7e810a84ac150..b974d1ab8a976 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -96,6 +96,11 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, } #define __HAVE_ARCH_PTEP_GET_AND_CLEAR +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + pte_update(mm, addr, ptep, ~0UL, 0, 0); +} + /* Generic accessors to PTE bits */ #ifndef pte_mkwrite_novma static inline pte_t pte_mkwrite_novma(pte_t pte) From 799d8836a7c4f4327833e4a5ca952a1700acdb14 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:33 +0200 Subject: [PATCH 18/51] powerpc/nohash: Refactor __ptep_set_access_flags() nohash/32 version of __ptep_set_access_flags() does the same as nohash/64 version, the only difference is that nohash/32 version is more complete and uses pte_update(). Make it common and remove the nohash/64 version. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/e296885df46289d3e5f4cb51efeefe593f76ef24.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 16 ---------------- arch/powerpc/include/asm/nohash/64/pgtable.h | 15 --------------- arch/powerpc/include/asm/nohash/pgtable.h | 17 +++++++++++++++++ 3 files changed, 17 insertions(+), 31 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 481594097f460..9164a9e41b020 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -161,22 +161,6 @@ static inline void pmd_clear(pmd_t *pmdp) *pmdp = __pmd(0); } -#ifndef __ptep_set_access_flags -static inline void __ptep_set_access_flags(struct vm_area_struct *vma, - pte_t *ptep, pte_t entry, - unsigned long address, - int psize) -{ - unsigned long set = pte_val(entry) & - (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); - int huge = psize > mmu_virtual_psize ? 1 : 0; - - pte_update(vma->vm_mm, address, ptep, 0, set, huge); - - flush_tlb_page(vma, address); -} -#endif - /* * Note that on Book E processors, the pmd contains the kernel virtual * (lowmem) address of the pte page. The physical address is less useful diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index b59fbf754f820..36b9bad428ccc 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -159,21 +159,6 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, __young; \ }) -/* Set the dirty and/or accessed bits atomically in a linux PTE */ -static inline void __ptep_set_access_flags(struct vm_area_struct *vma, - pte_t *ptep, pte_t entry, - unsigned long address, - int psize) -{ - unsigned long bits = pte_val(entry) & - (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); - - unsigned long old = pte_val(*ptep); - *ptep = __pte(old | bits); - - flush_tlb_page(vma, address); -} - #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) #define pgd_ERROR(e) \ diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index b974d1ab8a976..c5f21eda2a43b 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -101,6 +101,23 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt pte_update(mm, addr, ptep, ~0UL, 0, 0); } +/* Set the dirty and/or accessed bits atomically in a linux PTE */ +#ifndef __ptep_set_access_flags +static inline void __ptep_set_access_flags(struct vm_area_struct *vma, + pte_t *ptep, pte_t entry, + unsigned long address, + int psize) +{ + unsigned long set = pte_val(entry) & + (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); + int huge = psize > mmu_virtual_psize ? 1 : 0; + + pte_update(vma->vm_mm, address, ptep, 0, set, huge); + + flush_tlb_page(vma, address); +} +#endif + /* Generic accessors to PTE bits */ #ifndef pte_mkwrite_novma static inline pte_t pte_mkwrite_novma(pte_t pte) From 4c8dd6c9872d4e89fd2b3a6fc92fd6cc9cdce347 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:34 +0200 Subject: [PATCH 19/51] powerpc/e500: Simplify pte_mkexec() Commit b6cb20fdc273 ("powerpc/book3e: Fix set_memory_x() and set_memory_nx()") implemented a more elaborated version of pte_mkwrite() suitable for both kernel and user pages. That was needed because set_memory_x() was using pte_mkwrite(). But since commit a4c182ecf335 ("powerpc/set_memory: Avoid spinlock recursion in change_page_attr()") pte_mkwrite() is not used anymore by set_memory_x() so pte_mkwrite() can be simplified as it is only used for user pages. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/cdc822322fe2ff4b0f5ecfde71d09d950b1c7557.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/pte-e500.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/pte-e500.h b/arch/powerpc/include/asm/nohash/pte-e500.h index d8924cbd61e4a..99288e26b6b09 100644 --- a/arch/powerpc/include/asm/nohash/pte-e500.h +++ b/arch/powerpc/include/asm/nohash/pte-e500.h @@ -115,10 +115,7 @@ static inline pte_t pte_mkuser(pte_t pte) static inline pte_t pte_mkexec(pte_t pte) { - if (pte_val(pte) & _PAGE_BAP_UR) - return __pte((pte_val(pte) & ~_PAGE_BAP_SX) | _PAGE_BAP_UX); - else - return __pte((pte_val(pte) & ~_PAGE_BAP_UX) | _PAGE_BAP_SX); + return __pte((pte_val(pte) & ~_PAGE_BAP_SX) | _PAGE_BAP_UX); } #define pte_mkexec pte_mkexec From d3c0dfcfc95796701e82719722fd997ec5256013 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:35 +0200 Subject: [PATCH 20/51] powerpc: Implement and use pgprot_nx() ioremap_page_range() calls pgprot_nx() vmap() and vmap_pfn() clear execute permission by calling pgprot_nx(). When pgprot_nx() is not defined it falls back to a nop. Implement it for powerpc then use it in early_ioremap_range(). Then the call to pte_exprotect() can be removed from ioremap_prot(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/5993a7a097e989af1c97fc4a6c011fefc67dbe6e.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/pgtable.h | 6 ++++++ arch/powerpc/mm/ioremap.c | 5 ++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 966e7c5119f63..2bfb7dd3b49e6 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -71,6 +71,12 @@ static inline pgprot_t pte_pgprot(pte_t pte) return __pgprot(pte_flags); } +static inline pgprot_t pgprot_nx(pgprot_t prot) +{ + return pte_pgprot(pte_exprotect(__pte(pgprot_val(prot)))); +} +#define pgprot_nx pgprot_nx + #ifndef pmd_page_vaddr static inline const void *pmd_page_vaddr(pmd_t pmd) { diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c index 705e8e8ffde4d..d5159f2053800 100644 --- a/arch/powerpc/mm/ioremap.c +++ b/arch/powerpc/mm/ioremap.c @@ -50,8 +50,7 @@ void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long flags) if (pte_write(pte)) pte = pte_mkdirty(pte); - /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ - pte = pte_exprotect(pte); + /* we don't want to let _PAGE_USER leak out */ pte = pte_mkprivileged(pte); if (iowa_is_active()) @@ -66,7 +65,7 @@ int early_ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long i; for (i = 0; i < size; i += PAGE_SIZE) { - int err = map_kernel_page(ea + i, pa + i, prot); + int err = map_kernel_page(ea + i, pa + i, pgprot_nx(prot)); if (WARN_ON_ONCE(err)) /* Should clean up */ return err; From c7263f156395d1f2a2142375a75b7b040686a07a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:36 +0200 Subject: [PATCH 21/51] powerpc: Fail ioremap() instead of silently ignoring flags when PAGE_USER is set Calling ioremap() with _PAGE_USER (or _PAGE_PRIVILEDGE unset) is wrong. Loudly fail the call to ioremap() instead of blindly clearing the flags. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/b6dd5485ad00d2aafd2bb9b7c2c4eac3ebf2cdaf.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/ioremap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c index d5159f2053800..7823c38f09dee 100644 --- a/arch/powerpc/mm/ioremap.c +++ b/arch/powerpc/mm/ioremap.c @@ -51,7 +51,8 @@ void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long flags) pte = pte_mkdirty(pte); /* we don't want to let _PAGE_USER leak out */ - pte = pte_mkprivileged(pte); + if (WARN_ON(pte_user(pte))) + return NULL; if (iowa_is_active()) return iowa_ioremap(addr, size, pte_pgprot(pte), caller); From 69339071bb27f0b1371cd23d6dada3f976261c20 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:37 +0200 Subject: [PATCH 22/51] powerpc: Remove pte_mkuser() and pte_mkpriviledged() pte_mkuser() is never used. Remove it. pte_mkpriviledged() is not used anymore. Remove it too. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/1a1dc18816456c637dc8a9c38d532f7598b60ac4.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/pgtable.h | 10 ---------- arch/powerpc/include/asm/book3s/64/pgtable.h | 10 ---------- arch/powerpc/include/asm/nohash/32/pte-8xx.h | 14 -------------- arch/powerpc/include/asm/nohash/pgtable.h | 14 -------------- arch/powerpc/include/asm/nohash/pte-e500.h | 15 --------------- 5 files changed, 63 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 45b69ae2631e0..80505915c77cf 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -522,16 +522,6 @@ static inline pte_t pte_mkhuge(pte_t pte) return pte; } -static inline pte_t pte_mkprivileged(pte_t pte) -{ - return __pte(pte_val(pte) & ~_PAGE_USER); -} - -static inline pte_t pte_mkuser(pte_t pte) -{ - return __pte(pte_val(pte) | _PAGE_USER); -} - static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index dbd545e73161a..c3b921769ecea 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -630,16 +630,6 @@ static inline pte_t pte_mkdevmap(pte_t pte) return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SPECIAL | _PAGE_DEVMAP)); } -static inline pte_t pte_mkprivileged(pte_t pte) -{ - return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PRIVILEGED)); -} - -static inline pte_t pte_mkuser(pte_t pte) -{ - return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_PRIVILEGED)); -} - /* * This is potentially called with a pmd as the argument, in which case it's not * safe to check _PAGE_DEVMAP unless we also confirm that _PAGE_PTE is set. diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index 52395a5ecd707..843fe0138a663 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -122,20 +122,6 @@ static inline bool pte_user(pte_t pte) #define pte_user pte_user -static inline pte_t pte_mkprivileged(pte_t pte) -{ - return __pte(pte_val(pte) | _PAGE_SH); -} - -#define pte_mkprivileged pte_mkprivileged - -static inline pte_t pte_mkuser(pte_t pte) -{ - return __pte(pte_val(pte) & ~_PAGE_SH); -} - -#define pte_mkuser pte_mkuser - static inline pte_t pte_mkhuge(pte_t pte) { return __pte(pte_val(pte) | _PAGE_SPS | _PAGE_HUGE); diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index c5f21eda2a43b..18c1c9430a864 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -251,20 +251,6 @@ static inline pte_t pte_mkhuge(pte_t pte) } #endif -#ifndef pte_mkprivileged -static inline pte_t pte_mkprivileged(pte_t pte) -{ - return __pte(pte_val(pte) & ~_PAGE_USER); -} -#endif - -#ifndef pte_mkuser -static inline pte_t pte_mkuser(pte_t pte) -{ - return __pte(pte_val(pte) | _PAGE_USER); -} -#endif - static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); diff --git a/arch/powerpc/include/asm/nohash/pte-e500.h b/arch/powerpc/include/asm/nohash/pte-e500.h index 99288e26b6b09..9f9e3f02d4146 100644 --- a/arch/powerpc/include/asm/nohash/pte-e500.h +++ b/arch/powerpc/include/asm/nohash/pte-e500.h @@ -54,7 +54,6 @@ #define _PAGE_KERNEL_RWX (_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY | _PAGE_BAP_SX) #define _PAGE_KERNEL_ROX (_PAGE_BAP_SR | _PAGE_BAP_SX) #define _PAGE_USER (_PAGE_BAP_UR | _PAGE_BAP_SR) /* Can be read */ -#define _PAGE_PRIVILEGED (_PAGE_BAP_SR) #define _PAGE_SPECIAL _PAGE_SW0 @@ -99,20 +98,6 @@ #define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_BAP_UX) #ifndef __ASSEMBLY__ -static inline pte_t pte_mkprivileged(pte_t pte) -{ - return __pte((pte_val(pte) & ~_PAGE_USER) | _PAGE_PRIVILEGED); -} - -#define pte_mkprivileged pte_mkprivileged - -static inline pte_t pte_mkuser(pte_t pte) -{ - return __pte((pte_val(pte) & ~_PAGE_PRIVILEGED) | _PAGE_USER); -} - -#define pte_mkuser pte_mkuser - static inline pte_t pte_mkexec(pte_t pte) { return __pte((pte_val(pte) & ~_PAGE_BAP_SX) | _PAGE_BAP_UX); From a78587473642aec302697cdaceb719a7f8791369 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:38 +0200 Subject: [PATCH 23/51] powerpc: Rely on address instead of pte_user() pte_user() may return 'false' when a user page is PAGE_NONE. In that case it is still a user page and needs to be handled as such. So use is_kernel_addr() instead. And remove "user" text from ptdump as ptdump only dumps kernel tables. Note: no change done for book3s/64 which still has it 'priviledge' bit. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/c778dad89fad07727c31717a9c62f45357c29ebc.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/pgtable.h | 2 +- arch/powerpc/mm/book3s32/mmu.c | 4 ++-- arch/powerpc/mm/nohash/e500.c | 2 +- arch/powerpc/mm/pgtable.c | 22 +++++++++++----------- arch/powerpc/mm/ptdump/8xx.c | 5 ----- arch/powerpc/mm/ptdump/shared.c | 5 ----- 6 files changed, 15 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 18c1c9430a864..942f46e86b4b7 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -58,7 +58,7 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p *p = __pte(new); - if (IS_ENABLED(CONFIG_44x) && (old & _PAGE_USER) && (old & _PAGE_EXEC)) + if (IS_ENABLED(CONFIG_44x) && !is_kernel_addr(addr) && (old & _PAGE_EXEC)) icache_44x_need_flush = 1; /* huge pages use the old page table lock */ diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 850783cfa9c73..d1041c946ce20 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -127,7 +127,7 @@ static void setibat(int index, unsigned long virt, phys_addr_t phys, wimgxpp = (flags & _PAGE_COHERENT) | (_PAGE_EXEC ? BPP_RX : BPP_XX); bat[0].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ bat[0].batl = BAT_PHYS_ADDR(phys) | wimgxpp; - if (flags & _PAGE_USER) + if (!is_kernel_addr(virt)) bat[0].batu |= 1; /* Vp = 1 */ } @@ -280,7 +280,7 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys, wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX; bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp; - if (flags & _PAGE_USER) + if (!is_kernel_addr(virt)) bat[1].batu |= 1; /* Vp = 1 */ if (flags & _PAGE_GUARDED) { /* G bit must be zero in IBATs */ diff --git a/arch/powerpc/mm/nohash/e500.c b/arch/powerpc/mm/nohash/e500.c index 40a4e69ae1a91..5b7d7a932bfd4 100644 --- a/arch/powerpc/mm/nohash/e500.c +++ b/arch/powerpc/mm/nohash/e500.c @@ -122,7 +122,7 @@ static void settlbcam(int index, unsigned long virt, phys_addr_t phys, TLBCAM[index].MAS7 = (u64)phys >> 32; /* Below is unlikely -- only for large user pages or similar */ - if (pte_user(__pte(flags))) { + if (!is_kernel_addr(virt)) { TLBCAM[index].MAS3 |= MAS3_UR; TLBCAM[index].MAS3 |= (flags & _PAGE_EXEC) ? MAS3_UX : 0; TLBCAM[index].MAS3 |= (flags & _PAGE_RW) ? MAS3_UW : 0; diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 3f86fd217690b..781a68c69c2f0 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -46,13 +46,13 @@ static inline int is_exec_fault(void) * and we avoid _PAGE_SPECIAL and cache inhibited pte. We also only do that * on userspace PTEs */ -static inline int pte_looks_normal(pte_t pte) +static inline int pte_looks_normal(pte_t pte, unsigned long addr) { if (pte_present(pte) && !pte_special(pte)) { if (pte_ci(pte)) return 0; - if (pte_user(pte)) + if (!is_kernel_addr(addr)) return 1; } return 0; @@ -79,11 +79,11 @@ static struct folio *maybe_pte_to_folio(pte_t pte) * support falls into the same category. */ -static pte_t set_pte_filter_hash(pte_t pte) +static pte_t set_pte_filter_hash(pte_t pte, unsigned long addr) { pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); - if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) || - cpu_has_feature(CPU_FTR_NOEXECUTE))) { + if (pte_looks_normal(pte, addr) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) || + cpu_has_feature(CPU_FTR_NOEXECUTE))) { struct folio *folio = maybe_pte_to_folio(pte); if (!folio) return pte; @@ -97,7 +97,7 @@ static pte_t set_pte_filter_hash(pte_t pte) #else /* CONFIG_PPC_BOOK3S */ -static pte_t set_pte_filter_hash(pte_t pte) { return pte; } +static pte_t set_pte_filter_hash(pte_t pte, unsigned long addr) { return pte; } #endif /* CONFIG_PPC_BOOK3S */ @@ -105,7 +105,7 @@ static pte_t set_pte_filter_hash(pte_t pte) { return pte; } * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so * instead we "filter out" the exec permission for non clean pages. */ -static inline pte_t set_pte_filter(pte_t pte) +static inline pte_t set_pte_filter(pte_t pte, unsigned long addr) { struct folio *folio; @@ -113,10 +113,10 @@ static inline pte_t set_pte_filter(pte_t pte) return pte; if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) - return set_pte_filter_hash(pte); + return set_pte_filter_hash(pte, addr); /* No exec permission in the first place, move on */ - if (!pte_exec(pte) || !pte_looks_normal(pte)) + if (!pte_exec(pte) || !pte_looks_normal(pte, addr)) return pte; /* If you set _PAGE_EXEC on weird pages you're on your own */ @@ -200,7 +200,7 @@ void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep, * this context might not have been activated yet when this * is called. */ - pte = set_pte_filter(pte); + pte = set_pte_filter(pte, addr); /* Perform the setting of the PTE */ arch_enter_lazy_mmu_mode(); @@ -301,7 +301,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_ */ VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep)); - pte = set_pte_filter(pte); + pte = set_pte_filter(pte, addr); val = pte_val(pte); diff --git a/arch/powerpc/mm/ptdump/8xx.c b/arch/powerpc/mm/ptdump/8xx.c index fac932eb8f9aa..b5c79b11ea3c2 100644 --- a/arch/powerpc/mm/ptdump/8xx.c +++ b/arch/powerpc/mm/ptdump/8xx.c @@ -20,11 +20,6 @@ static const struct flag_info flag_array[] = { #endif .set = "huge", .clear = " ", - }, { - .mask = _PAGE_SH, - .val = 0, - .set = "user", - .clear = " ", }, { .mask = _PAGE_RO | _PAGE_NA, .val = 0, diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c index f884760ca5cfe..5ff101654c45b 100644 --- a/arch/powerpc/mm/ptdump/shared.c +++ b/arch/powerpc/mm/ptdump/shared.c @@ -11,11 +11,6 @@ static const struct flag_info flag_array[] = { { - .mask = _PAGE_USER, - .val = _PAGE_USER, - .set = "user", - .clear = " ", - }, { .mask = _PAGE_RW, .val = 0, .set = "r ", From a5a08dc90f4513d1a78582ec24b687fad01cc843 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:39 +0200 Subject: [PATCH 24/51] powerpc: Refactor permission masks used for __P/__S table and kernel memory flags Prepare a common version of the permission masks that will be based on _PAGE_NA, _PAGE_RO, _PAGE_ROX, _PAGE_RW, _PAGE_RWX that will be defined in platform specific headers in later patches. Put them in a new header pgtable-masks.h which will be included by platforms. And prepare a common version of flags used for mapping kernel memory that will be based on _PAGE_RO, _PAGE_ROX, _PAGE_RW, _PAGE_RWX that will be defined in platform specific headers. Put them in unless _PAGE_KERNEL_RO is already defined so that platform specific definitions can be dismantled one by one. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/d31b59efcdb38e675479563307af891aeadf6ec9.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/pgtable-masks.h | 30 ++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 arch/powerpc/include/asm/pgtable-masks.h diff --git a/arch/powerpc/include/asm/pgtable-masks.h b/arch/powerpc/include/asm/pgtable-masks.h new file mode 100644 index 0000000000000..808a3b9e8fc05 --- /dev/null +++ b/arch/powerpc/include/asm/pgtable-masks.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_PGTABLE_MASKS_H +#define _ASM_POWERPC_PGTABLE_MASKS_H + +#ifndef _PAGE_NA +#define _PAGE_NA 0 +#define _PAGE_RO _PAGE_READ +#define _PAGE_ROX (_PAGE_READ | _PAGE_EXEC) +#define _PAGE_RW (_PAGE_READ | _PAGE_WRITE) +#define _PAGE_RWX (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC) +#endif + +/* Permission flags for kernel mappings */ +#ifndef _PAGE_KERNEL_RO +#define _PAGE_KERNEL_RO _PAGE_RO +#define _PAGE_KERNEL_ROX _PAGE_ROX +#define _PAGE_KERNEL_RW (_PAGE_RW | _PAGE_DIRTY) +#define _PAGE_KERNEL_RWX (_PAGE_RWX | _PAGE_DIRTY) +#endif + +/* Permission masks used to generate the __P and __S table */ +#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_NA) +#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_RW) +#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_RWX) +#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_RO) +#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_ROX) +#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_RO) +#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_ROX) + +#endif /* _ASM_POWERPC_PGTABLE_MASKS_H */ From f9f09b93e80148fc5824afb338c318272abde529 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:40 +0200 Subject: [PATCH 25/51] powerpc/8xx: Use generic permission masks 8xx already has _PAGE_NA and _PAGE_RO. So add _PAGE_ROX, _PAGE_RW and _PAGE_RWX and remove specific permission masks. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/5d0b5ce43485f697313eee4326ddff97157fb219.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pte-8xx.h | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index 843fe0138a663..62c965a4511ab 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -48,6 +48,10 @@ #define _PAGE_HUGE 0x0800 /* Copied to L1 PS bit 29 */ +#define _PAGE_ROX (_PAGE_RO | _PAGE_EXEC) +#define _PAGE_RW 0 +#define _PAGE_RWX _PAGE_EXEC + /* cache related flags non existing on 8xx */ #define _PAGE_COHERENT 0 #define _PAGE_WRITETHRU 0 @@ -77,14 +81,7 @@ #define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE) #define _PAGE_BASE (_PAGE_BASE_NC) -/* Permission masks used to generate the __P and __S table */ -#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_NA) -#define PAGE_SHARED __pgprot(_PAGE_BASE) -#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_EXEC) -#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_RO) -#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_RO | _PAGE_EXEC) -#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_RO) -#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_RO | _PAGE_EXEC) +#include #ifndef __ASSEMBLY__ static inline pte_t pte_wrprotect(pte_t pte) From 58f534623c4d8800c2e5d63da9783530848e570c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:41 +0200 Subject: [PATCH 26/51] powerpc/64s: Use generic permission masks book3s64 need specific masks because it needs _PAGE_PRIVILEGED for PAGE_NONE. book3s64 already has _PAGE_RW and _PAGE_RWX. So add _PAGE_NA, _PAGE_RO and _PAGE_ROX and remove specific permission masks. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/d37a418de52e2c950cad6797e81663b56991366f.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/64/pgtable.h | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index c3b921769ecea..0fd12bdc7b5eb 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -17,6 +17,9 @@ #define _PAGE_EXEC 0x00001 /* execute permission */ #define _PAGE_WRITE 0x00002 /* write access allowed */ #define _PAGE_READ 0x00004 /* read access allowed */ +#define _PAGE_NA _PAGE_PRIVILEGED +#define _PAGE_RO _PAGE_READ +#define _PAGE_ROX (_PAGE_READ | _PAGE_EXEC) #define _PAGE_RW (_PAGE_READ | _PAGE_WRITE) #define _PAGE_RWX (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC) #define _PAGE_PRIVILEGED 0x00008 /* kernel access only */ @@ -136,21 +139,8 @@ #define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED) #define _PAGE_BASE (_PAGE_BASE_NC) -/* Permission masks used to generate the __P and __S table, - * - * Note:__pgprot is defined in arch/powerpc/include/asm/page.h - * - * Write permissions imply read permissions for now (we could make write-only - * pages on BookE but we don't bother for now). Execute permission control is - * possible on platforms that define _PAGE_EXEC - */ -#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_PRIVILEGED) -#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_RW) -#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_RW | _PAGE_EXEC) -#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_READ) -#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) -#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_READ) -#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) +#include + /* Radix only, Hash uses PAGE_READONLY_X + execute-only pkey instead */ #define PAGE_EXECONLY __pgprot(_PAGE_BASE | _PAGE_EXEC) From d20506d4728c3b7408e84d9aececbcb78c3061ee Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:42 +0200 Subject: [PATCH 27/51] powerpc/nohash: Add _PAGE_WRITE to supplement _PAGE_RW Several places, _PAGE_RW maps to write permission and don't always imply read. To make it more clear, do as book3s/64 in commit c7d54842deb1 ("powerpc/mm: Use _PAGE_READ to indicate Read access") and use _PAGE_WRITE when more relevant. For the time being _PAGE_WRITE is equivalent to _PAGE_RW but that will change when _PAGE_READ gets added in following patches. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/1f79b88db54d030ada776dc9845e0e88345bfc28.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pte-40x.h | 2 ++ arch/powerpc/include/asm/nohash/32/pte-44x.h | 2 ++ arch/powerpc/include/asm/nohash/32/pte-85xx.h | 2 ++ arch/powerpc/include/asm/nohash/64/pgtable.h | 2 +- arch/powerpc/include/asm/nohash/pgtable.h | 9 ++++++--- arch/powerpc/include/asm/nohash/pte-e500.h | 2 ++ arch/powerpc/kernel/head_40x.S | 12 ++++++------ arch/powerpc/kernel/head_44x.S | 4 ++-- arch/powerpc/kernel/head_85xx.S | 2 +- arch/powerpc/mm/nohash/e500.c | 4 ++-- 10 files changed, 26 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pte-40x.h b/arch/powerpc/include/asm/nohash/32/pte-40x.h index 0b4e5f8ce3e8a..e28ef0f5781e7 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-40x.h +++ b/arch/powerpc/include/asm/nohash/32/pte-40x.h @@ -49,6 +49,8 @@ #define _PAGE_EXEC 0x200 /* hardware: EX permission */ #define _PAGE_ACCESSED 0x400 /* software: R: page referenced */ +#define _PAGE_WRITE _PAGE_RW + /* No page size encoding in the linux PTE */ #define _PAGE_PSIZE 0 diff --git a/arch/powerpc/include/asm/nohash/32/pte-44x.h b/arch/powerpc/include/asm/nohash/32/pte-44x.h index b7ed13cee1378..fc0c075006ead 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-44x.h +++ b/arch/powerpc/include/asm/nohash/32/pte-44x.h @@ -75,6 +75,8 @@ #define _PAGE_NO_CACHE 0x00000400 /* H: I bit */ #define _PAGE_WRITETHRU 0x00000800 /* H: W bit */ +#define _PAGE_WRITE _PAGE_RW + /* No page size encoding in the linux PTE */ #define _PAGE_PSIZE 0 diff --git a/arch/powerpc/include/asm/nohash/32/pte-85xx.h b/arch/powerpc/include/asm/nohash/32/pte-85xx.h index 16451df5ddb05..462acf69e3021 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-85xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-85xx.h @@ -31,6 +31,8 @@ #define _PAGE_WRITETHRU 0x00400 /* H: W bit */ #define _PAGE_SPECIAL 0x00800 /* S: Special page */ +#define _PAGE_WRITE _PAGE_RW + #define _PAGE_KERNEL_RO 0 #define _PAGE_KERNEL_ROX _PAGE_EXEC #define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 36b9bad428ccc..2202c78730e8e 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -149,7 +149,7 @@ static inline void p4d_set(p4d_t *p4dp, unsigned long val) static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_update(mm, addr, ptep, _PAGE_RW, 0, 1); + pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1); } #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 942f46e86b4b7..9a91efa6455b8 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -84,7 +84,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_update(mm, addr, ptep, _PAGE_RW, 0, 0); + pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0); } #endif #define __HAVE_ARCH_PTEP_SET_WRPROTECT @@ -122,6 +122,9 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, #ifndef pte_mkwrite_novma static inline pte_t pte_mkwrite_novma(pte_t pte) { + /* + * write implies read, hence set both + */ return __pte(pte_val(pte) | _PAGE_RW); } #endif @@ -139,7 +142,7 @@ static inline pte_t pte_mkyoung(pte_t pte) #ifndef pte_wrprotect static inline pte_t pte_wrprotect(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_RW); + return __pte(pte_val(pte) & ~_PAGE_WRITE); } #endif @@ -153,7 +156,7 @@ static inline pte_t pte_mkexec(pte_t pte) #ifndef pte_write static inline int pte_write(pte_t pte) { - return pte_val(pte) & _PAGE_RW; + return pte_val(pte) & _PAGE_WRITE; } #endif #ifndef pte_read diff --git a/arch/powerpc/include/asm/nohash/pte-e500.h b/arch/powerpc/include/asm/nohash/pte-e500.h index 9f9e3f02d4146..b775c7d465a41 100644 --- a/arch/powerpc/include/asm/nohash/pte-e500.h +++ b/arch/powerpc/include/asm/nohash/pte-e500.h @@ -55,6 +55,8 @@ #define _PAGE_KERNEL_ROX (_PAGE_BAP_SR | _PAGE_BAP_SX) #define _PAGE_USER (_PAGE_BAP_UR | _PAGE_BAP_SR) /* Can be read */ +#define _PAGE_WRITE _PAGE_RW + #define _PAGE_SPECIAL _PAGE_SW0 /* Base page size */ diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index b32e7b2ebdcfd..9f92f5c5e6aa7 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -316,9 +316,9 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) andc. r9, r9, r11 /* Check permission */ bne 5f - rlwinm r9, r11, 1, _PAGE_RW /* dirty => rw */ - and r9, r9, r11 /* hwwrite = dirty & rw */ - rlwimi r11, r9, 0, _PAGE_RW /* replace rw by hwwrite */ + rlwinm r9, r11, 1, _PAGE_WRITE /* dirty => w */ + and r9, r9, r11 /* hwwrite = dirty & w */ + rlwimi r11, r9, 0, _PAGE_WRITE /* replace w by hwwrite */ /* Create TLB tag. This is the faulting address plus a static * set of bits. These are size, valid, E, U0. @@ -400,9 +400,9 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) andc. r9, r9, r11 /* Check permission */ bne 5f - rlwinm r9, r11, 1, _PAGE_RW /* dirty => rw */ - and r9, r9, r11 /* hwwrite = dirty & rw */ - rlwimi r11, r9, 0, _PAGE_RW /* replace rw by hwwrite */ + rlwinm r9, r11, 1, _PAGE_WRITE /* dirty => w */ + and r9, r9, r11 /* hwwrite = dirty & w */ + rlwimi r11, r9, 0, _PAGE_WRITE /* replace w by hwwrite */ /* Create TLB tag. This is the faulting address plus a static * set of bits. These are size, valid, E, U0. diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index a3197c9f721cd..858dabf844329 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -342,7 +342,7 @@ interrupt_base: mtspr SPRN_MMUCR,r12 /* Mask of required permission bits. Note that while we - * do copy ESR:ST to _PAGE_RW position as trying to write + * do copy ESR:ST to _PAGE_WRITE position as trying to write * to an RO page is pretty common, we don't do it with * _PAGE_DIRTY. We could do it, but it's a fairly rare * event so I'd rather take the overhead when it happens @@ -586,7 +586,7 @@ finish_tlb_load_44x: 4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */ /* Mask of required permission bits. Note that while we - * do copy ESR:ST to _PAGE_RW position as trying to write + * do copy ESR:ST to _PAGE_WRITE position as trying to write * to an RO page is pretty common, we don't do it with * _PAGE_DIRTY. We could do it, but it's a fairly rare * event so I'd rather take the overhead when it happens diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S index 0f1641a31250d..1b122cba557b7 100644 --- a/arch/powerpc/kernel/head_85xx.S +++ b/arch/powerpc/kernel/head_85xx.S @@ -471,7 +471,7 @@ END_BTB_FLUSH_SECTION 4: /* Mask of required permission bits. Note that while we - * do copy ESR:ST to _PAGE_RW position as trying to write + * do copy ESR:ST to _PAGE_WRITE position as trying to write * to an RO page is pretty common, we don't do it with * _PAGE_DIRTY. We could do it, but it's a fairly rare * event so I'd rather take the overhead when it happens diff --git a/arch/powerpc/mm/nohash/e500.c b/arch/powerpc/mm/nohash/e500.c index 5b7d7a932bfd4..921c3521ec113 100644 --- a/arch/powerpc/mm/nohash/e500.c +++ b/arch/powerpc/mm/nohash/e500.c @@ -117,7 +117,7 @@ static void settlbcam(int index, unsigned long virt, phys_addr_t phys, TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0; TLBCAM[index].MAS3 = (phys & MAS3_RPN) | MAS3_SR; - TLBCAM[index].MAS3 |= (flags & _PAGE_RW) ? MAS3_SW : 0; + TLBCAM[index].MAS3 |= (flags & _PAGE_WRITE) ? MAS3_SW : 0; if (mmu_has_feature(MMU_FTR_BIG_PHYS)) TLBCAM[index].MAS7 = (u64)phys >> 32; @@ -125,7 +125,7 @@ static void settlbcam(int index, unsigned long virt, phys_addr_t phys, if (!is_kernel_addr(virt)) { TLBCAM[index].MAS3 |= MAS3_UR; TLBCAM[index].MAS3 |= (flags & _PAGE_EXEC) ? MAS3_UX : 0; - TLBCAM[index].MAS3 |= (flags & _PAGE_RW) ? MAS3_UW : 0; + TLBCAM[index].MAS3 |= (flags & _PAGE_WRITE) ? MAS3_UW : 0; } else { TLBCAM[index].MAS3 |= (flags & _PAGE_EXEC) ? MAS3_SX : 0; } From 8e9bd41e4ce1001f5b89e4c9a69f870f39d56c12 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:43 +0200 Subject: [PATCH 28/51] powerpc/nohash: Replace pte_user() by pte_read() pte_user() is now only used in pte_access_permitted() to check access on vmas. User flag is cleared to make a page unreadable. So rename it pte_read() and remove pte_user() which isn't used anymore. For the time being it checks _PAGE_USER but in the near futur all plateforms will be converted to _PAGE_READ so lets support both for now. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/72cbb5be595e9ef884140def73815ed0b0b37010.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pte-8xx.h | 7 ------- arch/powerpc/include/asm/nohash/pgtable.h | 13 +++++++------ arch/powerpc/mm/ioremap.c | 4 ---- 3 files changed, 7 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index 62c965a4511ab..1ee38befd29a2 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -112,13 +112,6 @@ static inline pte_t pte_mkwrite_novma(pte_t pte) #define pte_mkwrite_novma pte_mkwrite_novma -static inline bool pte_user(pte_t pte) -{ - return !(pte_val(pte) & _PAGE_SH); -} - -#define pte_user pte_user - static inline pte_t pte_mkhuge(pte_t pte) { return __pte(pte_val(pte) | _PAGE_SPS | _PAGE_HUGE); diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 9a91efa6455b8..84b6a160d2fc3 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -159,9 +159,6 @@ static inline int pte_write(pte_t pte) return pte_val(pte) & _PAGE_WRITE; } #endif -#ifndef pte_read -static inline int pte_read(pte_t pte) { return 1; } -#endif static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; } @@ -189,10 +186,14 @@ static inline int pte_young(pte_t pte) * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in * _PAGE_USER. Need to explicitly match _PAGE_BAP_UR bit in that case too. */ -#ifndef pte_user -static inline bool pte_user(pte_t pte) +#ifndef pte_read +static inline bool pte_read(pte_t pte) { +#ifdef _PAGE_READ + return (pte_val(pte) & _PAGE_READ) == _PAGE_READ; +#else return (pte_val(pte) & _PAGE_USER) == _PAGE_USER; +#endif } #endif @@ -207,7 +208,7 @@ static inline bool pte_access_permitted(pte_t pte, bool write) * A read-only access is controlled by _PAGE_USER bit. * We have _PAGE_READ set for WRITE and EXECUTE */ - if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte)) + if (!pte_present(pte) || !pte_read(pte)) return false; if (write && !pte_write(pte)) diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c index 7823c38f09dee..7b0afcabd89f0 100644 --- a/arch/powerpc/mm/ioremap.c +++ b/arch/powerpc/mm/ioremap.c @@ -50,10 +50,6 @@ void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long flags) if (pte_write(pte)) pte = pte_mkdirty(pte); - /* we don't want to let _PAGE_USER leak out */ - if (WARN_ON(pte_user(pte))) - return NULL; - if (iowa_is_active()) return iowa_ioremap(addr, size, pte_pgprot(pte), caller); return __ioremap_caller(addr, size, pte_pgprot(pte), caller); From 48cf93bb168d506a8278a6fb25c2f88c1c93ce6e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:44 +0200 Subject: [PATCH 29/51] powerpc/e500: Introduce _PAGE_READ and remove _PAGE_USER e500 MMU has 6 page protection bits: - R, W, X for supervisor - R, W, X for user It means that it can support X without R. To do that, _PAGE_READ flag is needed. With 32 bits PTE there is no bit available for it in PTE. On the other hand the only real use of _PAGE_USER is to implement PAGE_NONE by clearing _PAGE_USER. As _PAGE_NONE can also be implemented by clearing _PAGE_READ, remove _PAGE_USER and add _PAGE_READ. Move _PAGE_PRESENT into bit 30 so that _PAGE_READ can match SR bit. With 64 bits PTE _PAGE_USER is already the combination of SR and UR so all we need to do is to rename it _PAGE_READ. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/0849ab6bf7ae2af23f94b0457fa40d0ea3983fe4.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pte-85xx.h | 22 ++++--------------- arch/powerpc/include/asm/nohash/pte-e500.h | 20 ++++++++--------- arch/powerpc/kernel/head_85xx.S | 10 ++++----- 3 files changed, 18 insertions(+), 34 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pte-85xx.h b/arch/powerpc/include/asm/nohash/32/pte-85xx.h index 462acf69e3021..653a342d3b253 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-85xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-85xx.h @@ -17,9 +17,9 @@ */ /* Definitions for FSL Book-E Cores */ -#define _PAGE_PRESENT 0x00001 /* S: PTE contains a translation */ -#define _PAGE_USER 0x00002 /* S: User page (maps to UR) */ -#define _PAGE_RW 0x00004 /* S: Write permission (SW) */ +#define _PAGE_READ 0x00001 /* H: Read permission (SR) */ +#define _PAGE_PRESENT 0x00002 /* S: PTE contains a translation */ +#define _PAGE_WRITE 0x00004 /* S: Write permission (SW) */ #define _PAGE_DIRTY 0x00008 /* S: Page dirty */ #define _PAGE_EXEC 0x00010 /* H: SX permission */ #define _PAGE_ACCESSED 0x00020 /* S: Page referenced */ @@ -31,13 +31,6 @@ #define _PAGE_WRITETHRU 0x00400 /* H: W bit */ #define _PAGE_SPECIAL 0x00800 /* S: Special page */ -#define _PAGE_WRITE _PAGE_RW - -#define _PAGE_KERNEL_RO 0 -#define _PAGE_KERNEL_ROX _PAGE_EXEC -#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW) -#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC) - /* No page size encoding in the linux PTE */ #define _PAGE_PSIZE 0 @@ -63,14 +56,7 @@ #define _PAGE_BASE (_PAGE_BASE_NC) #endif -/* Permission masks used to generate the __P and __S table */ -#define PAGE_NONE __pgprot(_PAGE_BASE) -#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) -#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC) -#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) -#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) +#include #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_NOHASH_32_PTE_FSL_85xx_H */ diff --git a/arch/powerpc/include/asm/nohash/pte-e500.h b/arch/powerpc/include/asm/nohash/pte-e500.h index b775c7d465a41..31d2c3ea7df8f 100644 --- a/arch/powerpc/include/asm/nohash/pte-e500.h +++ b/arch/powerpc/include/asm/nohash/pte-e500.h @@ -48,14 +48,19 @@ /* "Higher level" linux bit combinations */ #define _PAGE_EXEC (_PAGE_BAP_SX | _PAGE_BAP_UX) /* .. and was cache cleaned */ -#define _PAGE_RW (_PAGE_BAP_SW | _PAGE_BAP_UW) /* User write permission */ +#define _PAGE_READ (_PAGE_BAP_SR | _PAGE_BAP_UR) /* User read permission */ +#define _PAGE_WRITE (_PAGE_BAP_SW | _PAGE_BAP_UW) /* User write permission */ + #define _PAGE_KERNEL_RW (_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY) #define _PAGE_KERNEL_RO (_PAGE_BAP_SR) #define _PAGE_KERNEL_RWX (_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY | _PAGE_BAP_SX) #define _PAGE_KERNEL_ROX (_PAGE_BAP_SR | _PAGE_BAP_SX) -#define _PAGE_USER (_PAGE_BAP_UR | _PAGE_BAP_SR) /* Can be read */ -#define _PAGE_WRITE _PAGE_RW +#define _PAGE_NA 0 +#define _PAGE_RO _PAGE_READ +#define _PAGE_ROX (_PAGE_READ | _PAGE_BAP_UX) +#define _PAGE_RW (_PAGE_READ | _PAGE_WRITE) +#define _PAGE_RWX (_PAGE_READ | _PAGE_WRITE | _PAGE_BAP_UX) #define _PAGE_SPECIAL _PAGE_SW0 @@ -90,14 +95,7 @@ #define _PAGE_BASE (_PAGE_BASE_NC) #endif -/* Permission masks used to generate the __P and __S table */ -#define PAGE_NONE __pgprot(_PAGE_BASE) -#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) -#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_BAP_UX) -#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_BAP_UX) -#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_BAP_UX) +#include #ifndef __ASSEMBLY__ static inline pte_t pte_mkexec(pte_t pte) diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S index 1b122cba557b7..39724ff5ae1f5 100644 --- a/arch/powerpc/kernel/head_85xx.S +++ b/arch/powerpc/kernel/head_85xx.S @@ -485,10 +485,10 @@ END_BTB_FLUSH_SECTION */ mfspr r12,SPRN_ESR #ifdef CONFIG_PTE_64BIT - li r13,_PAGE_PRESENT + li r13,_PAGE_PRESENT|_PAGE_BAP_SR oris r13,r13,_PAGE_ACCESSED@h #else - li r13,_PAGE_PRESENT|_PAGE_ACCESSED + li r13,_PAGE_PRESENT|_PAGE_READ|_PAGE_ACCESSED #endif rlwimi r13,r12,11,29,29 @@ -783,15 +783,15 @@ BEGIN_MMU_FTR_SECTION mtspr SPRN_MAS7, r10 END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) #else - li r10, (_PAGE_EXEC | _PAGE_PRESENT) + li r10, (_PAGE_EXEC | _PAGE_READ) mr r13, r11 rlwimi r10, r11, 31, 29, 29 /* extract _PAGE_DIRTY into SW */ and r12, r11, r10 - andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */ + mcrf cr0, cr5 /* Test for user page */ slwi r10, r12, 1 or r10, r10, r12 rlwinm r10, r10, 0, ~_PAGE_EXEC /* Clear SX on user pages */ - iseleq r12, r12, r10 + isellt r12, r10, r12 rlwimi r13, r12, 0, 20, 31 /* Get RPN from PTE, merge w/ perms */ mtspr SPRN_MAS3, r13 #endif From 93820bfeefc4a125a6cedd1ee1a956eeb3eb2580 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:45 +0200 Subject: [PATCH 30/51] powerpc/44x: Introduce _PAGE_READ and remove _PAGE_USER 44x MMU has 6 page protection bits: - R, W, X for supervisor - R, W, X for user It means that it can support X without R. To do that, _PAGE_READ flag is needed but there is no bit available for it in PTE. On the other hand the only real use of _PAGE_USER is to implement PAGE_NONE by clearing _PAGE_USER. As _PAGE_NONE can also be implemented by clearing _PAGE_READ, remove _PAGE_USER and add _PAGE_READ. In order to insert bits in one go during TLB miss, move _PAGE_ACCESSED and put _PAGE_READ just after _PAGE_DIRTY so that _PAGE_DIRTY is copied into SW and _PAGE_READ into SR at once. With that change, 44x now also honors execute-only protection. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/043e17987b260b99b45094138c6cb2e89e63d499.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pte-44x.h | 22 +++--------- arch/powerpc/kernel/head_44x.S | 36 ++++++++++---------- 2 files changed, 22 insertions(+), 36 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pte-44x.h b/arch/powerpc/include/asm/nohash/32/pte-44x.h index fc0c075006ead..8518137252371 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-44x.h +++ b/arch/powerpc/include/asm/nohash/32/pte-44x.h @@ -63,28 +63,21 @@ */ #define _PAGE_PRESENT 0x00000001 /* S: PTE valid */ -#define _PAGE_RW 0x00000002 /* S: Write permission */ +#define _PAGE_WRITE 0x00000002 /* S: Write permission */ #define _PAGE_EXEC 0x00000004 /* H: Execute permission */ -#define _PAGE_ACCESSED 0x00000008 /* S: Page referenced */ +#define _PAGE_READ 0x00000008 /* S: Read permission */ #define _PAGE_DIRTY 0x00000010 /* S: Page dirty */ #define _PAGE_SPECIAL 0x00000020 /* S: Special page */ -#define _PAGE_USER 0x00000040 /* S: User page */ +#define _PAGE_ACCESSED 0x00000040 /* S: Page referenced */ #define _PAGE_ENDIAN 0x00000080 /* H: E bit */ #define _PAGE_GUARDED 0x00000100 /* H: G bit */ #define _PAGE_COHERENT 0x00000200 /* H: M bit */ #define _PAGE_NO_CACHE 0x00000400 /* H: I bit */ #define _PAGE_WRITETHRU 0x00000800 /* H: W bit */ -#define _PAGE_WRITE _PAGE_RW - /* No page size encoding in the linux PTE */ #define _PAGE_PSIZE 0 -#define _PAGE_KERNEL_RO 0 -#define _PAGE_KERNEL_ROX _PAGE_EXEC -#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW) -#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC) - /* TODO: Add large page lowmem mapping support */ #define _PMD_PRESENT 0 #define _PMD_PRESENT_MASK (PAGE_MASK) @@ -107,14 +100,7 @@ #define _PAGE_BASE (_PAGE_BASE_NC) #endif -/* Permission masks used to generate the __P and __S table */ -#define PAGE_NONE __pgprot(_PAGE_BASE) -#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) -#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC) -#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) -#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) +#include #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_NOHASH_32_PTE_44x_H */ diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 858dabf844329..25642e802ed3d 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -314,8 +314,8 @@ interrupt_base: * kernel page tables. */ lis r11, PAGE_OFFSET@h - cmplw r10, r11 - blt+ 3f + cmplw cr7, r10, r11 + blt+ cr7, 3f lis r11, swapper_pg_dir@h ori r11, r11, swapper_pg_dir@l @@ -355,7 +355,7 @@ interrupt_base: * place or can we save a couple of instructions here ? */ mfspr r12,SPRN_ESR - li r13,_PAGE_PRESENT|_PAGE_ACCESSED + li r13,_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_READ rlwimi r13,r12,10,30,30 /* Load the PTE */ @@ -428,8 +428,8 @@ interrupt_base: * kernel page tables. */ lis r11, PAGE_OFFSET@h - cmplw r10, r11 - blt+ 3f + cmplw cr7, r10, r11 + blt+ cr7, 3f lis r11, swapper_pg_dir@h ori r11, r11, swapper_pg_dir@l @@ -515,6 +515,7 @@ interrupt_base: * r11 - PTE high word value * r12 - PTE low word value * r13 - TLB index + * cr7 - Result of comparison with PAGE_OFFSET * MMUCR - loaded with proper value when we get here * Upon exit, we reload everything and RFI. */ @@ -533,11 +534,10 @@ finish_tlb_load_44x: tlbwe r10,r13,PPC44x_TLB_PAGEID /* Write PAGEID */ /* And WS 2 */ - li r10,0xf85 /* Mask to apply from PTE */ - rlwimi r10,r12,29,30,30 /* DIRTY -> SW position */ + li r10,0xf84 /* Mask to apply from PTE */ + rlwimi r10,r12,29,30,31 /* DIRTY,READ -> SW,SR position */ and r11,r12,r10 /* Mask PTE bits to keep */ - andi. r10,r12,_PAGE_USER /* User page ? */ - beq 1f /* nope, leave U bits empty */ + bge cr7,1f /* User page ? no, leave U bits empty */ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */ 1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */ @@ -568,8 +568,8 @@ finish_tlb_load_44x: * kernel page tables. */ lis r11,PAGE_OFFSET@h - cmplw cr0,r10,r11 - blt+ 3f + cmplw cr7,r10,r11 + blt+ cr7,3f lis r11,swapper_pg_dir@h ori r11,r11, swapper_pg_dir@l li r12,0 /* MMUCR = 0 */ @@ -599,7 +599,7 @@ finish_tlb_load_44x: * place or can we save a couple of instructions here ? */ mfspr r12,SPRN_ESR - li r13,_PAGE_PRESENT|_PAGE_ACCESSED + li r13,_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_READ rlwimi r13,r12,10,30,30 /* Load the PTE */ @@ -669,8 +669,8 @@ finish_tlb_load_44x: * kernel page tables. */ lis r11,PAGE_OFFSET@h - cmplw cr0,r10,r11 - blt+ 3f + cmplw cr7,r10,r11 + blt+ cr7,3f lis r11,swapper_pg_dir@h ori r11,r11, swapper_pg_dir@l li r12,0 /* MMUCR = 0 */ @@ -744,6 +744,7 @@ finish_tlb_load_44x: * r11 - PTE high word value * r12 - PTE low word value * r13 - free to use + * cr7 - Result of comparison with PAGE_OFFSET * MMUCR - loaded with proper value when we get here * Upon exit, we reload everything and RFI. */ @@ -753,11 +754,10 @@ finish_tlb_load_47x: tlbwe r11,r13,1 /* And make up word 2 */ - li r10,0xf85 /* Mask to apply from PTE */ - rlwimi r10,r12,29,30,30 /* DIRTY -> SW position */ + li r10,0xf84 /* Mask to apply from PTE */ + rlwimi r10,r12,29,30,31 /* DIRTY,READ -> SW,SR position */ and r11,r12,r10 /* Mask PTE bits to keep */ - andi. r10,r12,_PAGE_USER /* User page ? */ - beq 1f /* nope, leave U bits empty */ + bge cr7,1f /* User page ? no, leave U bits empty */ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */ 1: tlbwe r11,r13,2 From ed815bd3fe9b14a742e2ae094f7f55f70918dbbc Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:46 +0200 Subject: [PATCH 31/51] powerpc/40x: Introduce _PAGE_READ and remove _PAGE_USER _PAGE_USER is used to select the zone. Today zone 0 is kernel and zone 1 is user. To implement _PAGE_NONE, _PAGE_USER is cleared, leading to no access for user but kernel still has access to the page so it's possible for a user application to write in that page by using a kernel function as trampoline. What is really wanted is to have user rights on pages below TASK_SIZE and no user rights on pages above TASK_SIZE. Use zones for that. There are 16 zones so lets use the 4 upper address bits to set the zone and declare zone rights based on TASK_SIZE. Then drop _PAGE_USER and reuse it as _PAGE_READ that will be checked in Data TLB miss handler. That will properly handle PAGE_NONE for both kernel and user. In addition, it partially implements execute-only right. The implementation won't be complete because once a TLB has been loaded via the Instruction TLB miss handler, it will be possible to read the page. But at least it can't be read unless it is executed first. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/2a13e3ba8a5dec43143cc1f9a91ec71ea1529f3c.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pte-40x.h | 20 +++----------------- arch/powerpc/kernel/head_40x.S | 7 ++++--- arch/powerpc/mm/nohash/40x.c | 19 ++++++++++++------- 3 files changed, 19 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pte-40x.h b/arch/powerpc/include/asm/nohash/32/pte-40x.h index e28ef0f5781e7..d759cfd747541 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-40x.h +++ b/arch/powerpc/include/asm/nohash/32/pte-40x.h @@ -42,26 +42,19 @@ #define _PAGE_PRESENT 0x002 /* software: PTE contains a translation */ #define _PAGE_NO_CACHE 0x004 /* I: caching is inhibited */ #define _PAGE_WRITETHRU 0x008 /* W: caching is write-through */ -#define _PAGE_USER 0x010 /* matches one of the zone permission bits */ +#define _PAGE_READ 0x010 /* software: read permission */ #define _PAGE_SPECIAL 0x020 /* software: Special page */ #define _PAGE_DIRTY 0x080 /* software: dirty page */ -#define _PAGE_RW 0x100 /* hardware: WR, anded with dirty in exception */ +#define _PAGE_WRITE 0x100 /* hardware: WR, anded with dirty in exception */ #define _PAGE_EXEC 0x200 /* hardware: EX permission */ #define _PAGE_ACCESSED 0x400 /* software: R: page referenced */ -#define _PAGE_WRITE _PAGE_RW - /* No page size encoding in the linux PTE */ #define _PAGE_PSIZE 0 /* cache related flags non existing on 40x */ #define _PAGE_COHERENT 0 -#define _PAGE_KERNEL_RO 0 -#define _PAGE_KERNEL_ROX _PAGE_EXEC -#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW) -#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC) - #define _PMD_PRESENT 0x400 /* PMD points to page of PTEs */ #define _PMD_PRESENT_MASK _PMD_PRESENT #define _PMD_BAD 0x802 @@ -74,14 +67,7 @@ #define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED) #define _PAGE_BASE (_PAGE_BASE_NC) -/* Permission masks used to generate the __P and __S table */ -#define PAGE_NONE __pgprot(_PAGE_BASE) -#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) -#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC) -#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) -#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) +#include #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_NOHASH_32_PTE_40x_H */ diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 9f92f5c5e6aa7..9fc90410b385c 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -312,7 +312,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */ lwz r11, 0(r11) /* Get Linux PTE */ - li r9, _PAGE_PRESENT | _PAGE_ACCESSED + li r9, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_READ andc. r9, r9, r11 /* Check permission */ bne 5f @@ -561,10 +561,11 @@ finish_tlb_load: /* * Clear out the software-only bits in the PTE to generate the * TLB_DATA value. These are the bottom 2 bits of the RPM, the - * top 3 bits of the zone field, and M. + * 4 bits of the zone field, and M. */ - li r9, 0x0ce2 + li r9, 0x0cf2 andc r11, r11, r9 + rlwimi r11, r10, 8, 24, 27 /* Copy 4 upper address bit into zone */ /* load the next available TLB index. */ lwz r9, tlb_4xx_index@l(0) diff --git a/arch/powerpc/mm/nohash/40x.c b/arch/powerpc/mm/nohash/40x.c index 3684d6e570fb5..e835e80c09dba 100644 --- a/arch/powerpc/mm/nohash/40x.c +++ b/arch/powerpc/mm/nohash/40x.c @@ -48,20 +48,25 @@ */ void __init MMU_init_hw(void) { + int i; + unsigned long zpr; + /* * The Zone Protection Register (ZPR) defines how protection will - * be applied to every page which is a member of a given zone. At - * present, we utilize only two of the 4xx's zones. + * be applied to every page which is a member of a given zone. * The zone index bits (of ZSEL) in the PTE are used for software - * indicators, except the LSB. For user access, zone 1 is used, - * for kernel access, zone 0 is used. We set all but zone 1 - * to zero, allowing only kernel access as indicated in the PTE. - * For zone 1, we set a 01 binary (a value of 10 will not work) + * indicators. We use the 4 upper bits of virtual address to select + * the zone. We set all zones above TASK_SIZE to zero, allowing + * only kernel access as indicated in the PTE. For zones below + * TASK_SIZE, we set a 01 binary (a value of 10 will not work) * to allow user access as indicated in the PTE. This also allows * kernel access as indicated in the PTE. */ - mtspr(SPRN_ZPR, 0x10000000); + for (i = 0, zpr = 0; i < TASK_SIZE >> 28; i++) + zpr |= 1 << (30 - i * 2); + + mtspr(SPRN_ZPR, zpr); flush_instruction_cache(); From 46ebef51fd92f52ba7dca21d3c4332e4127de515 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:47 +0200 Subject: [PATCH 32/51] powerpc/32s: Add _PAGE_WRITE to supplement _PAGE_RW Several places, _PAGE_RW maps to write permission and don't always imply read. To make it more clear, do as book3s/64 in commit c7d54842deb1 ("powerpc/mm: Use _PAGE_READ to indicate Read access") and use _PAGE_WRITE when more relevant. For the time being _PAGE_WRITE is equivalent to _PAGE_RW but that will change when _PAGE_READ gets added in following patches. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/5798782869fe4d2698f104948dabd17657b89395.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/pgtable.h | 15 ++++++++++++--- arch/powerpc/kernel/head_book3s_32.S | 6 +++--- arch/powerpc/mm/book3s32/hash_low.S | 12 ++++++------ arch/powerpc/mm/book3s32/mmu.c | 2 +- 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 80505915c77cf..480ad6b4fd6fe 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -31,6 +31,8 @@ #define _PAGE_RW 0x400 /* software: user write access allowed */ #define _PAGE_SPECIAL 0x800 /* software: Special page */ +#define _PAGE_WRITE _PAGE_RW + #ifdef CONFIG_PTE_64BIT /* We never clear the high word of the pte */ #define _PTE_NONE_MASK (0xffffffff00000000ULL | _PAGE_HASHPTE) @@ -347,7 +349,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_update(mm, addr, ptep, _PAGE_RW, 0, 0); + pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0); } static inline void __ptep_set_access_flags(struct vm_area_struct *vma, @@ -406,7 +408,11 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) } /* Generic accessors to PTE bits */ -static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);} +static inline bool pte_write(pte_t pte) +{ + return !!(pte_val(pte) & _PAGE_WRITE); +} + static inline int pte_read(pte_t pte) { return 1; } static inline int pte_dirty(pte_t pte) { return !!(pte_val(pte) & _PAGE_DIRTY); } static inline int pte_young(pte_t pte) { return !!(pte_val(pte) & _PAGE_ACCESSED); } @@ -469,7 +475,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) /* Generic modifiers for PTE bits */ static inline pte_t pte_wrprotect(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_RW); + return __pte(pte_val(pte) & ~_PAGE_WRITE); } static inline pte_t pte_exprotect(pte_t pte) @@ -499,6 +505,9 @@ static inline pte_t pte_mkpte(pte_t pte) static inline pte_t pte_mkwrite_novma(pte_t pte) { + /* + * write implies read, hence set both + */ return __pte(pte_val(pte) | _PAGE_RW); } diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 6764b98ca360f..615d429d7bd10 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -503,9 +503,9 @@ DataLoadTLBMiss: andc. r1,r1,r0 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */ + rlwinm r1,r0,32-9,30,30 /* _PAGE_WRITE -> PP msb */ rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ - rlwimi r1,r0,32-3,24,24 /* _PAGE_RW -> _PAGE_DIRTY */ + rlwimi r1,r0,32-3,24,24 /* _PAGE_WRITE -> _PAGE_DIRTY */ rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */ xori r1,r1,_PAGE_DIRTY /* clear dirty when not rw */ ori r1,r1,0xe04 /* clear out reserved bits */ @@ -689,7 +689,7 @@ hash_page_dsi: mfdar r4 mfsrr0 r5 mfsrr1 r9 - rlwinm r3, r3, 32 - 15, _PAGE_RW /* DSISR_STORE -> _PAGE_RW */ + rlwinm r3, r3, 32 - 15, _PAGE_WRITE /* DSISR_STORE -> _PAGE_WRITE */ bl hash_page mfspr r10, SPRN_SPRG_THREAD restore_regs_thread r10 diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index 8b804e1a9fa44..acb0584c174cb 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -37,7 +37,7 @@ /* * Load a PTE into the hash table, if possible. * The address is in r4, and r3 contains an access flag: - * _PAGE_RW (0x400) if a write. + * _PAGE_WRITE (0x400) if a write. * r9 contains the SRR1 value, from which we use the MSR_PR bit. * SPRG_THREAD contains the physical address of the current task's thread. * @@ -113,15 +113,15 @@ _GLOBAL(hash_page) lwarx r6,0,r8 /* get linux-style pte, flag word */ #ifdef CONFIG_PPC_KUAP mfsrin r5,r4 - rlwinm r0,r9,28,_PAGE_RW /* MSR[PR] => _PAGE_RW */ - rlwinm r5,r5,12,_PAGE_RW /* Ks => _PAGE_RW */ + rlwinm r0,r9,28,_PAGE_WRITE /* MSR[PR] => _PAGE_WRITE */ + rlwinm r5,r5,12,_PAGE_WRITE /* Ks => _PAGE_WRITE */ andc r5,r5,r0 /* Ks & ~MSR[PR] */ - andc r5,r6,r5 /* Clear _PAGE_RW when Ks = 1 && MSR[PR] = 0 */ + andc r5,r6,r5 /* Clear _PAGE_WRITE when Ks = 1 && MSR[PR] = 0 */ andc. r5,r3,r5 /* check access & ~permission */ #else andc. r5,r3,r6 /* check access & ~permission */ #endif - rlwinm r0,r3,32-3,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */ + rlwinm r0,r3,32-3,24,24 /* _PAGE_WRITE access -> _PAGE_DIRTY */ ori r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE #ifdef CONFIG_SMP bne- .Lhash_page_out /* return if access not permitted */ @@ -307,7 +307,7 @@ Hash_msk = (((1 << Hash_bits) - 1) * 64) __REF _GLOBAL(create_hpte) /* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */ - rlwinm r8,r5,32-9,30,30 /* _PAGE_RW -> PP msb */ + rlwinm r8,r5,32-9,30,30 /* _PAGE_WRITE -> PP msb */ rlwinm r0,r5,32-6,30,30 /* _PAGE_DIRTY -> PP msb */ and r8,r8,r0 /* writable if _RW & _DIRTY */ rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */ diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index d1041c946ce20..5445587bfe841 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -277,7 +277,7 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys, /* Do DBAT first */ wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT | _PAGE_GUARDED); - wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX; + wimgxpp |= (flags & _PAGE_WRITE) ? BPP_RW : BPP_RX; bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp; if (!is_kernel_addr(virt)) From bac4cffc7c4a009cf0d16f1785a275e0a7715e8d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:48 +0200 Subject: [PATCH 33/51] powerpc/32s: Introduce _PAGE_READ and remove _PAGE_USER On 603 MMU, TLB missed are handled by SW and there are separated DTLB and ITLB. It is therefore possible to implement execute-only protection by not loading DTLB when read access is not permitted. To do that, _PAGE_READ flag is needed but there is no bit available for it in PTE. On the other hand the only real use of _PAGE_USER is to implement PAGE_NONE by clearing _PAGE_USER. As _PAGE_NONE can also be implemented by clearing _PAGE_READ, remove _PAGE_USER and add _PAGE_READ. Then use the virtual address to know whether user rights or kernel rights are to be used. With that change, 603 MMU now honors execute-only protection. For hash (604) MMU it is more tricky because hash table is common to load/store and execute. Nevertheless it is still possible to check whether _PAGE_READ is set before loading hash table for a load/store access. At least it can't be read unless it is executed first. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/b7702dd5a041ec59055ed2880f4952e94c087a2e.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/pgtable.h | 48 ++++----------- arch/powerpc/kernel/head_book3s_32.S | 61 +++++++++++--------- arch/powerpc/mm/book3s32/hash_low.S | 22 ++++--- 3 files changed, 60 insertions(+), 71 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 480ad6b4fd6fe..244621c88510e 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -20,7 +20,7 @@ #define _PAGE_PRESENT 0x001 /* software: pte contains a translation */ #define _PAGE_HASHPTE 0x002 /* hash_page has made an HPTE for this pte */ -#define _PAGE_USER 0x004 /* usermode access allowed */ +#define _PAGE_READ 0x004 /* software: read access allowed */ #define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */ #define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */ #define _PAGE_NO_CACHE 0x020 /* I: cache inhibit */ @@ -28,11 +28,9 @@ #define _PAGE_DIRTY 0x080 /* C: page changed */ #define _PAGE_ACCESSED 0x100 /* R: page referenced */ #define _PAGE_EXEC 0x200 /* software: exec allowed */ -#define _PAGE_RW 0x400 /* software: user write access allowed */ +#define _PAGE_WRITE 0x400 /* software: user write access allowed */ #define _PAGE_SPECIAL 0x800 /* software: Special page */ -#define _PAGE_WRITE _PAGE_RW - #ifdef CONFIG_PTE_64BIT /* We never clear the high word of the pte */ #define _PTE_NONE_MASK (0xffffffff00000000ULL | _PAGE_HASHPTE) @@ -44,26 +42,13 @@ #define _PMD_PRESENT_MASK (PAGE_MASK) #define _PMD_BAD (~PAGE_MASK) -/* We borrow the _PAGE_USER bit to store the exclusive marker in swap PTEs. */ -#define _PAGE_SWP_EXCLUSIVE _PAGE_USER +/* We borrow the _PAGE_READ bit to store the exclusive marker in swap PTEs. */ +#define _PAGE_SWP_EXCLUSIVE _PAGE_READ /* And here we include common definitions */ -#define _PAGE_KERNEL_RO 0 -#define _PAGE_KERNEL_ROX (_PAGE_EXEC) -#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW) -#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC) - #define _PAGE_HPTEFLAGS _PAGE_HASHPTE -#ifndef __ASSEMBLY__ - -static inline bool pte_user(pte_t pte) -{ - return pte_val(pte) & _PAGE_USER; -} -#endif /* __ASSEMBLY__ */ - /* * Location of the PFN in the PTE. Most 32-bit platforms use the same * as _PAGE_SHIFT here (ie, naturally aligned). @@ -99,20 +84,7 @@ static inline bool pte_user(pte_t pte) #define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED) #define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT) -/* - * Permission masks used to generate the __P and __S table. - * - * Note:__pgprot is defined in arch/powerpc/include/asm/page.h - * - * Write permissions imply read permissions for now. - */ -#define PAGE_NONE __pgprot(_PAGE_BASE) -#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) -#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC) -#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) -#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) +#include /* Permission masks used for kernel mappings */ #define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW) @@ -408,12 +380,16 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) } /* Generic accessors to PTE bits */ +static inline bool pte_read(pte_t pte) +{ + return !!(pte_val(pte) & _PAGE_READ); +} + static inline bool pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_WRITE); } -static inline int pte_read(pte_t pte) { return 1; } static inline int pte_dirty(pte_t pte) { return !!(pte_val(pte) & _PAGE_DIRTY); } static inline int pte_young(pte_t pte) { return !!(pte_val(pte) & _PAGE_ACCESSED); } static inline int pte_special(pte_t pte) { return !!(pte_val(pte) & _PAGE_SPECIAL); } @@ -448,10 +424,10 @@ static inline bool pte_ci(pte_t pte) static inline bool pte_access_permitted(pte_t pte, bool write) { /* - * A read-only access is controlled by _PAGE_USER bit. + * A read-only access is controlled by _PAGE_READ bit. * We have _PAGE_READ set for WRITE and EXECUTE */ - if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte)) + if (!pte_present(pte) || !pte_read(pte)) return false; if (write && !pte_write(pte)) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 615d429d7bd10..c1d89764dd222 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -412,10 +412,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) . = INTERRUPT_INST_TLB_MISS_603 InstructionTLBMiss: /* - * r0: scratch + * r0: userspace flag (later scratch) * r1: linux style pte ( later becomes ppc hardware pte ) * r2: ptr to linux-style pte - * r3: scratch + * r3: fault address */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_IMISS @@ -424,12 +424,13 @@ InstructionTLBMiss: cmplw 0,r1,r3 #endif mfspr r2, SPRN_SDR1 - li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER + li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC rlwinm r2, r2, 28, 0xfffff000 #ifdef CONFIG_MODULES + li r0, 3 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC + li r0, 0 addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ #endif 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ @@ -437,13 +438,15 @@ InstructionTLBMiss: rlwinm. r2,r2,0,0,19 /* extract address of pte page */ beq- InstructionAddressInvalid /* return if no mapping */ rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ - lwz r0,0(r2) /* get linux-style pte */ - andc. r1,r1,r0 /* check access & ~permission */ + lwz r2,0(r2) /* get linux-style pte */ + andc. r1,r1,r2 /* check access & ~permission */ bne- InstructionAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ +#ifdef CONFIG_MODULES + rlwimi r2, r0, 0, 31, 31 /* userspace ? -> PP lsb */ +#endif ori r1, r1, 0xe06 /* clear out reserved bits */ - andc r1, r0, r1 /* PP = user? 1 : 0 */ + andc r1, r2, r1 /* PP = user? 1 : 0 */ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) @@ -478,38 +481,38 @@ InstructionAddressInvalid: . = INTERRUPT_DATA_LOAD_TLB_MISS_603 DataLoadTLBMiss: /* - * r0: scratch + * r0: userspace flag (later scratch) * r1: linux style pte ( later becomes ppc hardware pte ) * r2: ptr to linux-style pte - * r3: scratch + * r3: fault address */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_DMISS lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SDR1 - li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER + li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_READ rlwinm r2, r2, 28, 0xfffff000 + li r0, 3 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r1, _PAGE_PRESENT | _PAGE_ACCESSED + li r0, 0 addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ rlwinm. r2,r2,0,0,19 /* extract address of pte page */ beq- DataAddressInvalid /* return if no mapping */ rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ - lwz r0,0(r2) /* get linux-style pte */ - andc. r1,r1,r0 /* check access & ~permission */ + lwz r2,0(r2) /* get linux-style pte */ + andc. r1,r1,r2 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwinm r1,r0,32-9,30,30 /* _PAGE_WRITE -> PP msb */ - rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ - rlwimi r1,r0,32-3,24,24 /* _PAGE_WRITE -> _PAGE_DIRTY */ - rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */ + rlwinm r1,r2,32-9,30,30 /* _PAGE_WRITE -> PP msb */ + rlwimi r2,r0,0,30,31 /* userspace ? -> PP */ + rlwimi r1,r2,32-3,24,24 /* _PAGE_WRITE -> _PAGE_DIRTY */ xori r1,r1,_PAGE_DIRTY /* clear dirty when not rw */ ori r1,r1,0xe04 /* clear out reserved bits */ - andc r1,r0,r1 /* PP = user? rw? 1: 3: 0 */ + andc r1,r2,r1 /* PP = user? rw? 1: 3: 0 */ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) @@ -558,34 +561,35 @@ DataAddressInvalid: . = INTERRUPT_DATA_STORE_TLB_MISS_603 DataStoreTLBMiss: /* - * r0: scratch + * r0: userspace flag (later scratch) * r1: linux style pte ( later becomes ppc hardware pte ) * r2: ptr to linux-style pte - * r3: scratch + * r3: fault address */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_DMISS lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SDR1 - li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER + li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED rlwinm r2, r2, 28, 0xfffff000 + li r0, 3 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED + li r0, 0 addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ rlwinm. r2,r2,0,0,19 /* extract address of pte page */ beq- DataAddressInvalid /* return if no mapping */ rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ - lwz r0,0(r2) /* get linux-style pte */ - andc. r1,r1,r0 /* check access & ~permission */ + lwz r2,0(r2) /* get linux-style pte */ + andc. r1,r1,r2 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ + rlwimi r2,r0,0,31,31 /* userspace ? -> PP lsb */ li r1,0xe06 /* clear out reserved bits & PP msb */ - andc r1,r0,r1 /* PP = user? 1: 0 */ + andc r1,r2,r1 /* PP = user? 1: 0 */ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) @@ -690,6 +694,7 @@ hash_page_dsi: mfsrr0 r5 mfsrr1 r9 rlwinm r3, r3, 32 - 15, _PAGE_WRITE /* DSISR_STORE -> _PAGE_WRITE */ + ori r3, r3, _PAGE_PRESENT | _PAGE_READ bl hash_page mfspr r10, SPRN_SPRG_THREAD restore_regs_thread r10 @@ -699,7 +704,7 @@ hash_page_isi: mr r11, r10 mfspr r10, SPRN_SPRG_THREAD save_regs_thread r10 - li r3, 0 + li r3, _PAGE_PRESENT | _PAGE_EXEC lwz r4, SRR0(r10) lwz r9, SRR1(r10) bl hash_page diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index acb0584c174cb..4ed0efd03db54 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -36,8 +36,9 @@ /* * Load a PTE into the hash table, if possible. - * The address is in r4, and r3 contains an access flag: - * _PAGE_WRITE (0x400) if a write. + * The address is in r4, and r3 contains required access flags: + * - For ISI: _PAGE_PRESENT | _PAGE_EXEC + * - For DSI: _PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE if a write. * r9 contains the SRR1 value, from which we use the MSR_PR bit. * SPRG_THREAD contains the physical address of the current task's thread. * @@ -67,12 +68,16 @@ _GLOBAL(hash_page) lis r0, TASK_SIZE@h /* check if kernel address */ cmplw 0,r4,r0 mfspr r8,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ - ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */ lwz r5,PGDIR(r8) /* virt page-table root */ blt+ 112f /* assume user more likely */ lis r5,swapper_pg_dir@ha /* if kernel address, use */ + andi. r0,r9,MSR_PR /* Check usermode */ addi r5,r5,swapper_pg_dir@l /* kernel page table */ - rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */ +#ifdef CONFIG_SMP + bne- .Lhash_page_out /* return if usermode */ +#else + bnelr- +#endif 112: tophys(r5, r5) #ifndef CONFIG_PTE_64BIT rlwimi r5,r4,12,20,29 /* insert top 10 bits of address */ @@ -307,12 +312,15 @@ Hash_msk = (((1 << Hash_bits) - 1) * 64) __REF _GLOBAL(create_hpte) /* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */ + lis r0, TASK_SIZE@h + rlwinm r5,r5,0,~3 /* Clear PP bits */ + cmplw r4,r0 rlwinm r8,r5,32-9,30,30 /* _PAGE_WRITE -> PP msb */ rlwinm r0,r5,32-6,30,30 /* _PAGE_DIRTY -> PP msb */ and r8,r8,r0 /* writable if _RW & _DIRTY */ - rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */ - rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */ - ori r8,r8,0xe04 /* clear out reserved bits */ + bge- 1f /* Kernelspace ? Skip */ + ori r5,r5,3 /* Userspace ? PP = 3 */ +1: ori r8,r8,0xe04 /* clear out reserved bits */ andc r8,r5,r8 /* PP = user? (rw&dirty? 1: 3): 0 */ BEGIN_FTR_SECTION rlwinm r8,r8,0,~_PAGE_COHERENT /* clear M (coherence not required) */ From ceaba662c06598e52cbe4b90fef6b71b7f965cf9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:49 +0200 Subject: [PATCH 34/51] powerpc/ptdump: Display _PAGE_READ and _PAGE_WRITE Instead of always displaying either 'rw' or 'r ' depending on _PAGE_RW, display 'r' or ' ' for _PAGE_READ and 'w' or ' ' for _PAGE_WRITE. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/dd8201a0f8fd87ce62a7ff2edc958b604b8ec3c0.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/ptdump/shared.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c index 5ff101654c45b..39c30c62b7ea7 100644 --- a/arch/powerpc/mm/ptdump/shared.c +++ b/arch/powerpc/mm/ptdump/shared.c @@ -11,10 +11,15 @@ static const struct flag_info flag_array[] = { { - .mask = _PAGE_RW, + .mask = _PAGE_READ, .val = 0, - .set = "r ", - .clear = "rw", + .set = " ", + .clear = "r", + }, { + .mask = _PAGE_WRITE, + .val = 0, + .set = " ", + .clear = "w", }, { .mask = _PAGE_EXEC, .val = _PAGE_EXEC, From 163a72fa89161b57b05d848aedfbd5103fac9dd7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:50 +0200 Subject: [PATCH 35/51] powerpc: Finally remove _PAGE_USER _PAGE_USER is now gone on all targets. Remove it completely. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/76ebe74fdaed4297a1d8203a61174650c1d8d278.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/pgtable.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 84b6a160d2fc3..8804bcc260cb9 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -182,18 +182,14 @@ static inline int pte_young(pte_t pte) } /* - * Don't just check for any non zero bits in __PAGE_USER, since for book3e + * Don't just check for any non zero bits in __PAGE_READ, since for book3e * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in - * _PAGE_USER. Need to explicitly match _PAGE_BAP_UR bit in that case too. + * _PAGE_READ. Need to explicitly match _PAGE_BAP_UR bit in that case too. */ #ifndef pte_read static inline bool pte_read(pte_t pte) { -#ifdef _PAGE_READ return (pte_val(pte) & _PAGE_READ) == _PAGE_READ; -#else - return (pte_val(pte) & _PAGE_USER) == _PAGE_USER; -#endif } #endif @@ -205,7 +201,7 @@ static inline bool pte_read(pte_t pte) static inline bool pte_access_permitted(pte_t pte, bool write) { /* - * A read-only access is controlled by _PAGE_USER bit. + * A read-only access is controlled by _PAGE_READ bit. * We have _PAGE_READ set for WRITE and EXECUTE */ if (!pte_present(pte) || !pte_read(pte)) From b1fba034a6793e9601d581594a781b46c255471f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 25 Sep 2023 20:31:51 +0200 Subject: [PATCH 36/51] powerpc: Support execute-only on all powerpc Introduce PAGE_EXECONLY_X macro which provides exec-only rights. The _X may be seen as redundant with the EXECONLY but it helps keep consistency, all macros having the EXEC right have _X. And put it next to PAGE_NONE as PAGE_EXECONLY_X is somehow PAGE_NONE + EXEC just like all other SOMETHING_X are just SOMETHING + EXEC. On book3s/64 PAGE_EXECONLY becomes PAGE_READONLY_X. On book3s/64, as PAGE_EXECONLY is only valid for Radix add VM_READ flag in vm_get_page_prot() for non-Radix. And update access_error() so that a non exec fault on a VM_EXEC only mapping is always invalid, even when the underlying layer don't always generate a fault for that. For 8xx, set PAGE_EXECONLY_X as _PAGE_NA | _PAGE_EXEC. For others, only set it as just _PAGE_EXEC With that change, 8xx, e500 and 44x fully honor execute-only protection. On 40x that is a partial implementation of execute-only. The implementation won't be complete because once a TLB has been loaded via the Instruction TLB miss handler, it will be possible to read the page. But at least it can't be read unless it is executed first. On 603 MMU, TLB missed are handled by SW and there are separate DTLB and ITLB. Execute-only is therefore now supported by not loading DTLB when read access is not permitted. On hash (604) MMU it is more tricky because hash table is common to load/store and execute. Nevertheless it is still possible to check whether _PAGE_READ is set before loading hash table for a load/store access. At least it can't be read unless it is executed first. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/4283ea9cbef9ff2fbee468904800e1962bc8fc18.1695659959.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/pgtable.h | 2 +- arch/powerpc/include/asm/book3s/64/pgtable.h | 4 +--- arch/powerpc/include/asm/nohash/32/pte-8xx.h | 1 + arch/powerpc/include/asm/nohash/pgtable.h | 2 +- arch/powerpc/include/asm/nohash/pte-e500.h | 1 + arch/powerpc/include/asm/pgtable-masks.h | 2 ++ arch/powerpc/mm/book3s64/pgtable.c | 10 ++++------ arch/powerpc/mm/fault.c | 9 +++++---- arch/powerpc/mm/pgtable.c | 4 ++-- 9 files changed, 18 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 244621c88510e..52971ee30717f 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -425,7 +425,7 @@ static inline bool pte_access_permitted(pte_t pte, bool write) { /* * A read-only access is controlled by _PAGE_READ bit. - * We have _PAGE_READ set for WRITE and EXECUTE + * We have _PAGE_READ set for WRITE */ if (!pte_present(pte) || !pte_read(pte)) return false; diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 0fd12bdc7b5eb..751b01227e364 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -18,6 +18,7 @@ #define _PAGE_WRITE 0x00002 /* write access allowed */ #define _PAGE_READ 0x00004 /* read access allowed */ #define _PAGE_NA _PAGE_PRIVILEGED +#define _PAGE_NAX _PAGE_EXEC #define _PAGE_RO _PAGE_READ #define _PAGE_ROX (_PAGE_READ | _PAGE_EXEC) #define _PAGE_RW (_PAGE_READ | _PAGE_WRITE) @@ -141,9 +142,6 @@ #include -/* Radix only, Hash uses PAGE_READONLY_X + execute-only pkey instead */ -#define PAGE_EXECONLY __pgprot(_PAGE_BASE | _PAGE_EXEC) - /* Permission masks used for kernel mappings */ #define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW) #define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_TOLERANT) diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index 1ee38befd29a2..137dc3c84e456 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -48,6 +48,7 @@ #define _PAGE_HUGE 0x0800 /* Copied to L1 PS bit 29 */ +#define _PAGE_NAX (_PAGE_NA | _PAGE_EXEC) #define _PAGE_ROX (_PAGE_RO | _PAGE_EXEC) #define _PAGE_RW 0 #define _PAGE_RWX _PAGE_EXEC diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 8804bcc260cb9..427db14292c9e 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -202,7 +202,7 @@ static inline bool pte_access_permitted(pte_t pte, bool write) { /* * A read-only access is controlled by _PAGE_READ bit. - * We have _PAGE_READ set for WRITE and EXECUTE + * We have _PAGE_READ set for WRITE */ if (!pte_present(pte) || !pte_read(pte)) return false; diff --git a/arch/powerpc/include/asm/nohash/pte-e500.h b/arch/powerpc/include/asm/nohash/pte-e500.h index 31d2c3ea7df8f..f516f0b5b7a82 100644 --- a/arch/powerpc/include/asm/nohash/pte-e500.h +++ b/arch/powerpc/include/asm/nohash/pte-e500.h @@ -57,6 +57,7 @@ #define _PAGE_KERNEL_ROX (_PAGE_BAP_SR | _PAGE_BAP_SX) #define _PAGE_NA 0 +#define _PAGE_NAX _PAGE_BAP_UX #define _PAGE_RO _PAGE_READ #define _PAGE_ROX (_PAGE_READ | _PAGE_BAP_UX) #define _PAGE_RW (_PAGE_READ | _PAGE_WRITE) diff --git a/arch/powerpc/include/asm/pgtable-masks.h b/arch/powerpc/include/asm/pgtable-masks.h index 808a3b9e8fc05..6e8e2db26a5a8 100644 --- a/arch/powerpc/include/asm/pgtable-masks.h +++ b/arch/powerpc/include/asm/pgtable-masks.h @@ -4,6 +4,7 @@ #ifndef _PAGE_NA #define _PAGE_NA 0 +#define _PAGE_NAX _PAGE_EXEC #define _PAGE_RO _PAGE_READ #define _PAGE_ROX (_PAGE_READ | _PAGE_EXEC) #define _PAGE_RW (_PAGE_READ | _PAGE_WRITE) @@ -20,6 +21,7 @@ /* Permission masks used to generate the __P and __S table */ #define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_NA) +#define PAGE_EXECONLY_X __pgprot(_PAGE_BASE | _PAGE_NAX) #define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_RW) #define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_RWX) #define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_RO) diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 8f8a62d3ff4de..be229290a6a77 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -635,12 +635,10 @@ pgprot_t vm_get_page_prot(unsigned long vm_flags) unsigned long prot; /* Radix supports execute-only, but protection_map maps X -> RX */ - if (radix_enabled() && ((vm_flags & VM_ACCESS_FLAGS) == VM_EXEC)) { - prot = pgprot_val(PAGE_EXECONLY); - } else { - prot = pgprot_val(protection_map[vm_flags & - (VM_ACCESS_FLAGS | VM_SHARED)]); - } + if (!radix_enabled() && ((vm_flags & VM_ACCESS_FLAGS) == VM_EXEC)) + vm_flags |= VM_READ; + + prot = pgprot_val(protection_map[vm_flags & (VM_ACCESS_FLAGS | VM_SHARED)]); if (vm_flags & VM_SAO) prot |= _PAGE_SAO; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index b1723094d464c..9e49ede2bc1c0 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -266,14 +266,15 @@ static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma } /* - * VM_READ, VM_WRITE and VM_EXEC all imply read permissions, as - * defined in protection_map[]. Read faults can only be caused by - * a PROT_NONE mapping, or with a PROT_EXEC-only mapping on Radix. + * VM_READ, VM_WRITE and VM_EXEC may imply read permissions, as + * defined in protection_map[]. In that case Read faults can only be + * caused by a PROT_NONE mapping. However a non exec access on a + * VM_EXEC only mapping is invalid anyway, so report it as such. */ if (unlikely(!vma_is_accessible(vma))) return true; - if (unlikely(radix_enabled() && ((vma->vm_flags & VM_ACCESS_FLAGS) == VM_EXEC))) + if ((vma->vm_flags & VM_ACCESS_FLAGS) == VM_EXEC) return true; /* diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 781a68c69c2f0..79508c1d15d7f 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -492,7 +492,7 @@ const pgprot_t protection_map[16] = { [VM_READ] = PAGE_READONLY, [VM_WRITE] = PAGE_COPY, [VM_WRITE | VM_READ] = PAGE_COPY, - [VM_EXEC] = PAGE_READONLY_X, + [VM_EXEC] = PAGE_EXECONLY_X, [VM_EXEC | VM_READ] = PAGE_READONLY_X, [VM_EXEC | VM_WRITE] = PAGE_COPY_X, [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_X, @@ -500,7 +500,7 @@ const pgprot_t protection_map[16] = { [VM_SHARED | VM_READ] = PAGE_READONLY, [VM_SHARED | VM_WRITE] = PAGE_SHARED, [VM_SHARED | VM_WRITE | VM_READ] = PAGE_SHARED, - [VM_SHARED | VM_EXEC] = PAGE_READONLY_X, + [VM_SHARED | VM_EXEC] = PAGE_EXECONLY_X, [VM_SHARED | VM_EXEC | VM_READ] = PAGE_READONLY_X, [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_SHARED_X, [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED_X From ff7a60ab1e065257a0e467c13b519f4debcd7fcf Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 11 Oct 2023 16:37:00 +1100 Subject: [PATCH 37/51] powerpc/xive: Fix endian conversion size Sparse reports a size mismatch in the endian swap. The Opal implementation[1] passes the value as a __be64, and the receiving variable out_qsize is a u64, so the use of be32_to_cpu() appears to be an error. [1]: https://github.com/open-power/skiboot/blob/80e2b1dc73/hw/xive.c#L3854 Fixes: 88ec6b93c8e7 ("powerpc/xive: add OPAL extensions for the XIVE native exploitation support") Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20231011053711.93427-2-bgray@linux.ibm.com --- arch/powerpc/sysdev/xive/native.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 9f0af4d795d88..f1c0fa6ece21d 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -802,7 +802,7 @@ int xive_native_get_queue_info(u32 vp_id, u32 prio, if (out_qpage) *out_qpage = be64_to_cpu(qpage); if (out_qsize) - *out_qsize = be32_to_cpu(qsize); + *out_qsize = be64_to_cpu(qsize); if (out_qeoi_page) *out_qeoi_page = be64_to_cpu(qeoi_page); if (out_escalate_irq) From 340a60e3725b9a229eaf03a9b3f8538f22f6ac16 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 11 Oct 2023 16:37:02 +1100 Subject: [PATCH 38/51] powerpc: Explicitly reverse bytes when checking for byte reversal Sparse reports an invalid endian cast here. The code is written for big endian platforms, so le32_to_cpu() acts as a byte reversal. This file is checked by sparse on a little endian build though, so replace the reverse function with the dedicated swab32() function to better express the intent of the code. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20231011053711.93427-4-bgray@linux.ibm.com --- arch/powerpc/sysdev/mpic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index ba287abcb008b..dabbdd356664c 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -355,7 +355,7 @@ static void __init mpic_test_broken_ipi(struct mpic *mpic) mpic_write(mpic->gregs, MPIC_INFO(GREG_IPI_VECTOR_PRI_0), MPIC_VECPRI_MASK); r = mpic_read(mpic->gregs, MPIC_INFO(GREG_IPI_VECTOR_PRI_0)); - if (r == le32_to_cpu(MPIC_VECPRI_MASK)) { + if (r == swab32(MPIC_VECPRI_MASK)) { printk(KERN_INFO "mpic: Detected reversed IPI registers\n"); mpic->flags |= MPIC_BROKEN_IPI; } From ddfb7d9db843fd4fbdff81fb8a8743f865c3dd96 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 11 Oct 2023 16:37:03 +1100 Subject: [PATCH 39/51] powerpc: Use NULL instead of 0 for null pointers Sparse reports several uses of 0 for pointer arguments and comparisons. Replace with NULL to better convey the intent. Remove entirely if a comparison to follow the kernel style of implicit boolean conversions. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20231011053711.93427-5-bgray@linux.ibm.com --- arch/powerpc/kernel/setup_64.c | 2 +- arch/powerpc/kvm/book3s_xive_native.c | 2 +- arch/powerpc/net/bpf_jit_comp.c | 4 ++-- arch/powerpc/perf/imc-pmu.c | 2 +- arch/powerpc/platforms/4xx/soc.c | 2 +- arch/powerpc/platforms/pseries/lpar.c | 8 ++++---- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 246201d0d879e..2f19d5e944852 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -364,7 +364,7 @@ void __init early_setup(unsigned long dt_ptr) */ initialise_paca(&boot_paca, 0); fixup_boot_paca(&boot_paca); - WARN_ON(local_paca != 0); + WARN_ON(local_paca); setup_paca(&boot_paca); /* install the paca into registers */ /* -------- printk is now safe to use ------- */ diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 712ab91ced398..6e2ebbd8aaace 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -567,7 +567,7 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, u8 priority; struct kvm_ppc_xive_eq kvm_eq; int rc; - __be32 *qaddr = 0; + __be32 *qaddr = NULL; struct page *page; struct xive_q *q; gfn_t gfn; diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 37043dfc1addd..471e37cf8e2a4 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -146,9 +146,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) * update ctgtx.idx as it pretends to output instructions, then we can * calculate total size from idx. */ - bpf_jit_build_prologue(0, &cgctx); + bpf_jit_build_prologue(NULL, &cgctx); addrs[fp->len] = cgctx.idx * 4; - bpf_jit_build_epilogue(0, &cgctx); + bpf_jit_build_epilogue(NULL, &cgctx); fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN * 4; extable_len = fp->aux->num_exentries * sizeof(struct exception_table_entry); diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 9d229ef7f86ef..eba2baabccb03 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -544,7 +544,7 @@ static int nest_imc_event_init(struct perf_event *event) break; } pcni++; - } while (pcni->vbase != 0); + } while (pcni->vbase); if (!flag) return -ENODEV; diff --git a/arch/powerpc/platforms/4xx/soc.c b/arch/powerpc/platforms/4xx/soc.c index b2d940437a662..5412e6b21e106 100644 --- a/arch/powerpc/platforms/4xx/soc.c +++ b/arch/powerpc/platforms/4xx/soc.c @@ -112,7 +112,7 @@ static int __init ppc4xx_l2c_probe(void) } /* Install error handler */ - if (request_irq(irq, l2c_error_handler, 0, "L2C", 0) < 0) { + if (request_irq(irq, l2c_error_handler, 0, "L2C", NULL) < 0) { printk(KERN_ERR "Cannot install L2C error handler" ", cache is not enabled\n"); of_node_put(np); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index f2cb62148f36f..b4e86b7ea584d 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -192,9 +192,9 @@ static void free_dtl_buffers(unsigned long *time_limit) continue; kmem_cache_free(dtl_cache, pp->dispatch_log); pp->dtl_ridx = 0; - pp->dispatch_log = 0; - pp->dispatch_log_end = 0; - pp->dtl_curr = 0; + pp->dispatch_log = NULL; + pp->dispatch_log_end = NULL; + pp->dtl_curr = NULL; if (time_limit && time_after(jiffies, *time_limit)) { cond_resched(); @@ -223,7 +223,7 @@ static void destroy_cpu_associativity(void) { kfree(vcpu_associativity); kfree(pcpu_associativity); - vcpu_associativity = pcpu_associativity = 0; + vcpu_associativity = pcpu_associativity = NULL; } static __be32 *__get_cpu_associativity(int cpu, __be32 *cpu_assoc, int flag) From 419d5d112c2e1e78beda9c3299f71c35141d8dba Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 11 Oct 2023 16:37:04 +1100 Subject: [PATCH 40/51] powerpc: Remove extern from function implementations Sparse reports several function implementations annotated with extern. This is clearly incorrect, likely just copied from an actual extern declaration in another file. Fix the sparse warnings by removing extern. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20231011053711.93427-6-bgray@linux.ibm.com --- arch/powerpc/kernel/iommu.c | 8 ++++---- arch/powerpc/kernel/traps.c | 4 ++-- arch/powerpc/kvm/book3s_64_vio.c | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 14251bc5219eb..3e28579f7c625 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1074,10 +1074,10 @@ int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa) } EXPORT_SYMBOL_GPL(iommu_tce_check_gpa); -extern long iommu_tce_xchg_no_kill(struct mm_struct *mm, - struct iommu_table *tbl, - unsigned long entry, unsigned long *hpa, - enum dma_data_direction *direction) +long iommu_tce_xchg_no_kill(struct mm_struct *mm, + struct iommu_table *tbl, + unsigned long entry, unsigned long *hpa, + enum dma_data_direction *direction) { long ret; unsigned long size = 0; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index fe3f720c9cd61..5ea2014aff90d 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -157,7 +157,7 @@ static int die_owner = -1; static unsigned int die_nest_count; static int die_counter; -extern void panic_flush_kmsg_start(void) +void panic_flush_kmsg_start(void) { /* * These are mostly taken from kernel/panic.c, but tries to do @@ -170,7 +170,7 @@ extern void panic_flush_kmsg_start(void) bust_spinlocks(1); } -extern void panic_flush_kmsg_end(void) +void panic_flush_kmsg_end(void) { kmsg_dump(KMSG_DUMP_PANIC); bust_spinlocks(0); diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 93b695b289e99..15200d766fc53 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -77,8 +77,8 @@ static void kvm_spapr_tce_liobn_put(struct kref *kref) call_rcu(&stit->rcu, kvm_spapr_tce_iommu_table_free); } -extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, - struct iommu_group *grp) +void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, + struct iommu_group *grp) { int i; struct kvmppc_spapr_tce_table *stt; @@ -105,8 +105,8 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, rcu_read_unlock(); } -extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, - struct iommu_group *grp) +long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, + struct iommu_group *grp) { struct kvmppc_spapr_tce_table *stt = NULL; bool found = false; From 2b4a6cc9a1a7cf6958c8b11f94e61c8e81b60b88 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 11 Oct 2023 16:37:05 +1100 Subject: [PATCH 41/51] powerpc: Annotate endianness of various variables and functions Sparse reports several endianness warnings on variables and functions that are consistently treated as big endian. There are no multi-endianness shenanigans going on here so fix these low hanging fruit up in one patch. All changes are just type annotations; no endianness switching operations are introduced by this patch. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20231011053711.93427-7-bgray@linux.ibm.com --- arch/powerpc/include/asm/book3s/64/pgtable.h | 2 +- arch/powerpc/include/asm/imc-pmu.h | 16 ++++++++-------- arch/powerpc/kernel/prom_init.c | 2 +- arch/powerpc/kexec/core_64.c | 4 ++-- arch/powerpc/kexec/file_load_64.c | 6 +++--- arch/powerpc/mm/drmem.c | 2 +- arch/powerpc/perf/hv-24x7.c | 2 +- arch/powerpc/perf/imc-pmu.c | 9 +++++---- arch/powerpc/platforms/powermac/feature.c | 3 ++- arch/powerpc/platforms/pseries/hotplug-memory.c | 3 ++- 10 files changed, 26 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 751b01227e364..cb77eddca54b9 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -626,7 +626,7 @@ static inline pte_t pte_mkdevmap(pte_t pte) */ static inline int pte_devmap(pte_t pte) { - u64 mask = cpu_to_be64(_PAGE_DEVMAP | _PAGE_PTE); + __be64 mask = cpu_to_be64(_PAGE_DEVMAP | _PAGE_PTE); return (pte_raw(pte) & mask) == mask; } diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h index 699a88584ae16..a656635df3861 100644 --- a/arch/powerpc/include/asm/imc-pmu.h +++ b/arch/powerpc/include/asm/imc-pmu.h @@ -74,14 +74,14 @@ struct imc_events { * The following is the data structure to hold trace imc data. */ struct trace_imc_data { - u64 tb1; - u64 ip; - u64 val; - u64 cpmc1; - u64 cpmc2; - u64 cpmc3; - u64 cpmc4; - u64 tb2; + __be64 tb1; + __be64 ip; + __be64 val; + __be64 cpmc1; + __be64 cpmc2; + __be64 cpmc3; + __be64 cpmc4; + __be64 tb2; }; /* Event attribute array index */ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index d464ba412084d..e67effdba85cc 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -947,7 +947,7 @@ struct option_vector7 { } __packed; struct ibm_arch_vec { - struct { u32 mask, val; } pvrs[14]; + struct { __be32 mask, val; } pvrs[14]; u8 num_vectors; diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c index a79e28c91e2be..0bee7ca9a77c6 100644 --- a/arch/powerpc/kexec/core_64.c +++ b/arch/powerpc/kexec/core_64.c @@ -379,8 +379,8 @@ void default_machine_kexec(struct kimage *image) #ifdef CONFIG_PPC_64S_HASH_MMU /* Values we need to export to the second kernel via the device tree. */ -static unsigned long htab_base; -static unsigned long htab_size; +static __be64 htab_base; +static __be64 htab_size; static struct property htab_base_prop = { .name = "linux,htab-base", diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index 09187aca3d1f8..961a6dd673656 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -32,7 +32,7 @@ #include struct umem_info { - u64 *buf; /* data buffer for usable-memory property */ + __be64 *buf; /* data buffer for usable-memory property */ u32 size; /* size allocated for the data buffer */ u32 max_entries; /* maximum no. of entries */ u32 idx; /* index of current entry */ @@ -443,10 +443,10 @@ static int locate_mem_hole_bottom_up_ppc64(struct kexec_buf *kbuf, * * Returns buffer on success, NULL on error. */ -static u64 *check_realloc_usable_mem(struct umem_info *um_info, int cnt) +static __be64 *check_realloc_usable_mem(struct umem_info *um_info, int cnt) { u32 new_size; - u64 *tbuf; + __be64 *tbuf; if ((um_info->idx + cnt) <= um_info->max_entries) return um_info->buf; diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index 2369d1bf2411e..fde7790277f75 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -67,7 +67,7 @@ static int drmem_update_dt_v1(struct device_node *memory, struct property *new_prop; struct of_drconf_cell_v1 *dr_cell; struct drmem_lmb *lmb; - u32 *p; + __be32 *p; new_prop = clone_property(prop, prop->length); if (!new_prop) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 3449be7c0d51f..057ec2e3451d1 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -1338,7 +1338,7 @@ static int get_count_from_result(struct perf_event *event, for (i = count = 0, element_data = res->elements + data_offset; i < num_elements; i++, element_data += data_size + data_offset) - count += be64_to_cpu(*((u64 *) element_data)); + count += be64_to_cpu(*((__be64 *)element_data)); *countp = count; diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index eba2baabccb03..2016aee270379 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1025,16 +1025,16 @@ static bool is_thread_imc_pmu(struct perf_event *event) return false; } -static u64 * get_event_base_addr(struct perf_event *event) +static __be64 *get_event_base_addr(struct perf_event *event) { u64 addr; if (is_thread_imc_pmu(event)) { addr = (u64)per_cpu(thread_imc_mem, smp_processor_id()); - return (u64 *)(addr + (event->attr.config & IMC_EVENT_OFFSET_MASK)); + return (__be64 *)(addr + (event->attr.config & IMC_EVENT_OFFSET_MASK)); } - return (u64 *)event->hw.event_base; + return (__be64 *)event->hw.event_base; } static void thread_imc_pmu_start_txn(struct pmu *pmu, @@ -1058,7 +1058,8 @@ static int thread_imc_pmu_commit_txn(struct pmu *pmu) static u64 imc_read_counter(struct perf_event *event) { - u64 *addr, data; + __be64 *addr; + u64 data; /* * In-Memory Collection (IMC) counters are free flowing counters. diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index ae62d432db8bb..81c9fbae88b14 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -2614,7 +2614,8 @@ static void __init probe_one_macio(const char *name, const char *compat, int typ struct device_node* node; int i; volatile u32 __iomem *base; - const u32 *addrp, *revp; + const __be32 *addrp; + const u32 *revp; phys_addr_t addr; u64 size; diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index aa4042dcd6d40..a43bfb01720ae 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -55,7 +55,8 @@ static bool find_aa_index(struct device_node *dr_node, struct property *ala_prop, const u32 *lmb_assoc, u32 *aa_index) { - u32 *assoc_arrays, new_prop_size; + __be32 *assoc_arrays; + u32 new_prop_size; struct property *new_prop; int aa_arrays, aa_array_entries, aa_array_sz; int i, index; From 8577dd00a6ba335bc359313599d6100522a1931c Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 11 Oct 2023 16:37:07 +1100 Subject: [PATCH 42/51] powerpc/opal: Annotate out param endianness Sparse reports an endian mismatch with args to opal_int_get_xirr(). Checking the skiboot source[1] shows the function takes a __be32* (as expected), so update the function declaration to reflect this. [1]: https://github.com/open-power/skiboot/blob/80e2b1dc73/hw/xive.c#L3479 Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20231011053711.93427-9-bgray@linux.ibm.com --- arch/powerpc/include/asm/opal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index a9b31cc258fcb..b66b0c615f4f1 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -227,7 +227,7 @@ int64_t opal_pci_set_power_state(uint64_t async_token, uint64_t id, uint64_t data); int64_t opal_pci_poll2(uint64_t id, uint64_t data); -int64_t opal_int_get_xirr(uint32_t *out_xirr, bool just_poll); +int64_t opal_int_get_xirr(__be32 *out_xirr, bool just_poll); int64_t opal_int_set_cppr(uint8_t cppr); int64_t opal_int_eoi(uint32_t xirr); int64_t opal_int_set_mfrr(uint32_t cpu, uint8_t mfrr); From c6519c6df0722e432f330afbc7c00d16d5be5c58 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 11 Oct 2023 16:37:08 +1100 Subject: [PATCH 43/51] powerpc/uaccess: Cast away __user annotation after verification Sparse reports dereference of a __user pointer. copy_mc_to_user() takes a __user pointer, verifies it, then calls the generic copy routine copy_mc_generic(). As we have verified the pointer, cast out the __user annotation when passing to copy_mc_generic(). Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20231011053711.93427-10-bgray@linux.ibm.com --- arch/powerpc/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index fb725ec77926e..f1f9890f50d3e 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -374,7 +374,7 @@ copy_mc_to_user(void __user *to, const void *from, unsigned long n) if (check_copy_size(from, n, true)) { if (access_ok(to, n)) { allow_write_to_user(to, n); - n = copy_mc_generic((void *)to, from, n); + n = copy_mc_generic((void __force *)to, from, n); prevent_write_to_user(to, n); } } From 2c4ce3e65b1a543123ffcec4b021ad6ebd4e4e4e Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 11 Oct 2023 16:37:09 +1100 Subject: [PATCH 44/51] powerpc: Cast away __iomem in low level IO routines Sparse reports dereferencing an __iomem pointer. These routines are clearly low level handlers for IO memory, so force cast away the __iomem annotation to tell sparse the dereferences are safe. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20231011053711.93427-11-bgray@linux.ibm.com --- arch/powerpc/kernel/io.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c index 2f29b7d432de3..6af535905984a 100644 --- a/arch/powerpc/kernel/io.c +++ b/arch/powerpc/kernel/io.c @@ -33,7 +33,7 @@ void _insb(const volatile u8 __iomem *port, void *buf, long count) return; asm volatile("sync"); do { - tmp = *port; + tmp = *(const volatile u8 __force *)port; eieio(); *tbuf++ = tmp; } while (--count != 0); @@ -49,7 +49,7 @@ void _outsb(volatile u8 __iomem *port, const void *buf, long count) return; asm volatile("sync"); do { - *port = *tbuf++; + *(volatile u8 __force *)port = *tbuf++; } while (--count != 0); asm volatile("sync"); } @@ -64,7 +64,7 @@ void _insw_ns(const volatile u16 __iomem *port, void *buf, long count) return; asm volatile("sync"); do { - tmp = *port; + tmp = *(const volatile u16 __force *)port; eieio(); *tbuf++ = tmp; } while (--count != 0); @@ -80,7 +80,7 @@ void _outsw_ns(volatile u16 __iomem *port, const void *buf, long count) return; asm volatile("sync"); do { - *port = *tbuf++; + *(volatile u16 __force *)port = *tbuf++; } while (--count != 0); asm volatile("sync"); } @@ -95,7 +95,7 @@ void _insl_ns(const volatile u32 __iomem *port, void *buf, long count) return; asm volatile("sync"); do { - tmp = *port; + tmp = *(const volatile u32 __force *)port; eieio(); *tbuf++ = tmp; } while (--count != 0); @@ -111,7 +111,7 @@ void _outsl_ns(volatile u32 __iomem *port, const void *buf, long count) return; asm volatile("sync"); do { - *port = *tbuf++; + *(volatile u32 __force *)port = *tbuf++; } while (--count != 0); asm volatile("sync"); } From 82f635243f209a85d3deb9f64439c3ea84cd4ecb Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 11 Oct 2023 16:37:10 +1100 Subject: [PATCH 45/51] powerpc/eeh: Remove unnecessary cast Sparse reports a warning when casting to an int. There is no need to cast in the first place, so drop them. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20231011053711.93427-12-bgray@linux.ibm.com --- arch/powerpc/kernel/eeh_driver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 438568a472d03..48773d2d9be3c 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -39,7 +39,7 @@ static int eeh_result_priority(enum pci_ers_result result) case PCI_ERS_RESULT_NEED_RESET: return 6; default: - WARN_ONCE(1, "Unknown pci_ers_result value: %d\n", (int)result); + WARN_ONCE(1, "Unknown pci_ers_result value: %d\n", result); return 0; } }; @@ -60,7 +60,7 @@ static const char *pci_ers_result_name(enum pci_ers_result result) case PCI_ERS_RESULT_NO_AER_DRIVER: return "no AER driver"; default: - WARN_ONCE(1, "Unknown result type: %d\n", (int)result); + WARN_ONCE(1, "Unknown result type: %d\n", result); return "unknown"; } }; From b574b817cc7bfcc87bbc92b4b19525442401ae5e Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 11 Oct 2023 16:37:11 +1100 Subject: [PATCH 46/51] powerpc/fadump: Annotate endianness cast with __force Sparse reports an endianness error with the else case of val = (cpu_endian ? be64_to_cpu(reg_entry->reg_val) : (u64)(reg_entry->reg_val)); This is a safe operation because the code is explicitly working with dynamic endianness, so add the __force annotation to tell Sparse to ignore it. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20231011053711.93427-13-bgray@linux.ibm.com --- arch/powerpc/platforms/powernv/opal-fadump.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/opal-fadump.h b/arch/powerpc/platforms/powernv/opal-fadump.h index 3f715efb0aa6e..5eeb794b5eb1e 100644 --- a/arch/powerpc/platforms/powernv/opal-fadump.h +++ b/arch/powerpc/platforms/powernv/opal-fadump.h @@ -135,7 +135,7 @@ static inline void opal_fadump_read_regs(char *bufp, unsigned int regs_cnt, for (i = 0; i < regs_cnt; i++, bufp += reg_entry_size) { reg_entry = (struct hdat_fadump_reg_entry *)bufp; val = (cpu_endian ? be64_to_cpu(reg_entry->reg_val) : - (u64)(reg_entry->reg_val)); + (u64 __force)(reg_entry->reg_val)); opal_fadump_set_regval_regnum(regs, be32_to_cpu(reg_entry->reg_type), be32_to_cpu(reg_entry->reg_num), From 6db51ff905362e6c79593dbda08f61f24b25971c Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 5 Jan 2023 14:41:45 +0800 Subject: [PATCH 47/51] macintosh/macio-adb: add missing iounmap() on error in macio_init() Add missing iounmap(), if request_irq() fails. Signed-off-by: Yang Yingliang Signed-off-by: Michael Ellerman Link: https://msgid.link/20230105064145.3879356-1-yangyingliang@huawei.com --- drivers/macintosh/macio-adb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/macintosh/macio-adb.c b/drivers/macintosh/macio-adb.c index 55a9f8c3a150e..779f1268286eb 100644 --- a/drivers/macintosh/macio-adb.c +++ b/drivers/macintosh/macio-adb.c @@ -123,6 +123,7 @@ int macio_init(void) irq = irq_of_parse_and_map(adbs, 0); of_node_put(adbs); if (request_irq(irq, macio_adb_interrupt, 0, "ADB", (void *)0)) { + iounmap(adb); printk(KERN_ERR "ADB: can't get irq %d\n", irq); return -EAGAIN; } From b28d1ccf921a4333be14017d82066386d419e638 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsburskii Date: Sat, 15 Apr 2023 04:17:53 -0700 Subject: [PATCH 48/51] powerpc/io: Expect immutable pointer in virt_to_phys() prototype MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit virt_to_phys() doesn't need the address pointer to be mutable. At the same time allowing it to be mutable leads to the following build warning for constant pointers: warning: passing argument 1 of ‘virt_to_phys’ discards ‘const’ qualifier from pointer target type Signed-off-by: Stanislav Kinsburskii Reviewed-by: Linus Walleij Signed-off-by: Michael Ellerman Link: https://msgid.link/168155747391.13678.10634415747614468991.stgit@skinsburskii.localdomain --- arch/powerpc/include/asm/io.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 0732b743e0996..5220274a62772 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -950,7 +950,7 @@ extern void __iomem *__ioremap_caller(phys_addr_t, unsigned long size, * almost all conceivable cases a device driver should not be using * this function */ -static inline unsigned long virt_to_phys(volatile void * address) +static inline unsigned long virt_to_phys(const volatile void * address) { WARN_ON(IS_ENABLED(CONFIG_DEBUG_VIRTUAL) && !virt_addr_valid(address)); From d45c4b48dafb5820e5cc267ff9a6d7784d13a43c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 24 Aug 2023 16:42:10 +1000 Subject: [PATCH 49/51] powerpc: Hide empty pt_regs at base of the stack A thread started via eg. user_mode_thread() runs in the kernel to begin with and then may later return to userspace. While it's running in the kernel it has a pt_regs at the base of its kernel stack, but that pt_regs is all zeroes. If the thread oopses in that state, it leads to an ugly stack trace with a big block of zero GPRs, as reported by Joel: Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0) CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.5.0-rc7-00004-gf7757129e3de-dirty #3 Hardware name: IBM PowerNV (emulated by qemu) POWER9 0x4e1200 opal:v7.0 PowerNV Call Trace: [c0000000036afb00] [c0000000010dd058] dump_stack_lvl+0x6c/0x9c (unreliable) [c0000000036afb30] [c00000000013c524] panic+0x178/0x424 [c0000000036afbd0] [c000000002005100] mount_root_generic+0x250/0x324 [c0000000036afca0] [c0000000020057d0] prepare_namespace+0x2d4/0x344 [c0000000036afd20] [c0000000020049c0] kernel_init_freeable+0x358/0x3ac [c0000000036afdf0] [c0000000000111b0] kernel_init+0x30/0x1a0 [c0000000036afe50] [c00000000000debc] ret_from_kernel_user_thread+0x14/0x1c --- interrupt: 0 at 0x0 NIP: 0000000000000000 LR: 0000000000000000 CTR: 0000000000000000 REGS: c0000000036afe80 TRAP: 0000 Not tainted (6.5.0-rc7-00004-gf7757129e3de-dirty) MSR: 0000000000000000 <> CR: 00000000 XER: 00000000 CFAR: 0000000000000000 IRQMASK: 0 GPR00: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR04: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR12: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR24: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR28: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 NIP [0000000000000000] 0x0 LR [0000000000000000] 0x0 --- interrupt: 0 The all-zero pt_regs looks ugly and conveys no useful information, other than its presence. So detect that case and just show the presence of the frame by printing the interrupt marker, eg: Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0) CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.5.0-rc3-00126-g18e9506562a0-dirty #301 Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1202 0xf000005 of:SLOF,HEAD hv:linux,kvm pSeries Call Trace: [c000000003aabb00] [c000000001143db8] dump_stack_lvl+0x6c/0x9c (unreliable) [c000000003aabb30] [c00000000014c624] panic+0x178/0x424 [c000000003aabbd0] [c0000000020050fc] mount_root_generic+0x250/0x324 [c000000003aabca0] [c0000000020057cc] prepare_namespace+0x2d4/0x344 [c000000003aabd20] [c0000000020049bc] kernel_init_freeable+0x358/0x3ac [c000000003aabdf0] [c0000000000111b0] kernel_init+0x30/0x1a0 [c000000003aabe50] [c00000000000debc] ret_from_kernel_user_thread+0x14/0x1c --- interrupt: 0 at 0x0 To avoid ever suppressing a valid pt_regs make sure the pt_regs has a zero MSR and TRAP value, and is located at the very base of the stack. Fixes: 6895dfc04741 ("powerpc: copy_thread fill in interrupt frame marker and back chain") Reported-by: Joel Stanley Reported-by: Nicholas Piggin Reviewed-by: Joel Stanley Signed-off-by: Michael Ellerman Link: https://msgid.link/20230824064210.907266-1-mpe@ellerman.id.au --- arch/powerpc/kernel/process.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index b68898ac07e19..392404688cec3 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -2258,6 +2258,22 @@ unsigned long __get_wchan(struct task_struct *p) return ret; } +static bool empty_user_regs(struct pt_regs *regs, struct task_struct *tsk) +{ + unsigned long stack_page; + + // A non-empty pt_regs should never have a zero MSR or TRAP value. + if (regs->msr || regs->trap) + return false; + + // Check it sits at the very base of the stack + stack_page = (unsigned long)task_stack_page(tsk); + if ((unsigned long)(regs + 1) != stack_page + THREAD_SIZE) + return false; + + return true; +} + static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH; void __no_sanitize_address show_stack(struct task_struct *tsk, @@ -2322,9 +2338,13 @@ void __no_sanitize_address show_stack(struct task_struct *tsk, lr = regs->link; printk("%s--- interrupt: %lx at %pS\n", loglvl, regs->trap, (void *)regs->nip); - __show_regs(regs); - printk("%s--- interrupt: %lx\n", - loglvl, regs->trap); + + // Detect the case of an empty pt_regs at the very base + // of the stack and suppress showing it in full. + if (!empty_user_regs(regs, tsk)) { + __show_regs(regs); + printk("%s--- interrupt: %lx\n", loglvl, regs->trap); + } firstframe = 1; } From e08c43e6c3eb5d805b61d981f1e8286ee0dc6d1a Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Sat, 14 Oct 2023 01:57:14 +0800 Subject: [PATCH 50/51] powerpc/perf: Optimize find_alternatives_list() using binary search This patch improves the performance of event alternative lookup by replacing the previous linear search with a more efficient binary search. This change reduces the time complexity for the search process from O(n) to O(log(n)). A pre-sorted table of event values and their corresponding indices has been introduced to expedite the search process. Signed-off-by: Kuan-Wei Chiu [mpe: Call the array "presort*ed*_event_table", minor formatting] Signed-off-by: Michael Ellerman Link: https://msgid.link/20231013175714.2142775-1-visitorckw@gmail.com --- arch/powerpc/perf/power6-pmu.c | 46 +++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c index 5729b6e059de0..9f720b522e171 100644 --- a/arch/powerpc/perf/power6-pmu.c +++ b/arch/powerpc/perf/power6-pmu.c @@ -335,26 +335,38 @@ static const unsigned int event_alternatives[][MAX_ALT] = { { 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */ }; -/* - * This could be made more efficient with a binary search on - * a presorted list, if necessary - */ static int find_alternatives_list(u64 event) { - int i, j; - unsigned int alt; - - for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { - if (event < event_alternatives[i][0]) - return -1; - for (j = 0; j < MAX_ALT; ++j) { - alt = event_alternatives[i][j]; - if (!alt || event < alt) - break; - if (event == alt) - return i; - } + const unsigned int presorted_event_table[] = { + 0x0130e8, 0x080080, 0x080088, 0x10000a, 0x10000b, 0x10000d, 0x10000e, + 0x100010, 0x10001a, 0x100026, 0x100054, 0x100056, 0x1000f0, 0x1000f8, + 0x1000fc, 0x200008, 0x20000e, 0x200010, 0x200012, 0x200054, 0x2000f0, + 0x2000f2, 0x2000f4, 0x2000f5, 0x2000f6, 0x2000f8, 0x2000fc, 0x2000fe, + 0x2d0030, 0x30000a, 0x30000c, 0x300010, 0x300012, 0x30001a, 0x300056, + 0x3000f0, 0x3000f2, 0x3000f6, 0x3000f8, 0x3000fc, 0x3000fe, 0x400006, + 0x400007, 0x40000a, 0x40000e, 0x400010, 0x400018, 0x400056, 0x4000f0, + 0x4000f8, 0x600005 + }; + const unsigned int event_index_table[] = { + 0, 1, 2, 3, 4, 1, 5, 6, 7, 8, 9, 10, 11, 12, 13, 12, 14, + 7, 15, 2, 9, 16, 3, 4, 0, 17, 10, 18, 19, 20, 1, 17, 15, 19, + 18, 2, 16, 21, 8, 0, 22, 13, 14, 11, 21, 5, 20, 22, 1, 6, 3 + }; + int hi = ARRAY_SIZE(presorted_event_table) - 1; + int lo = 0; + + while (lo <= hi) { + int mid = lo + (hi - lo) / 2; + unsigned int alt = presorted_event_table[mid]; + + if (alt < event) + lo = mid + 1; + else if (alt > event) + hi = mid - 1; + else + return event_index_table[mid]; } + return -1; } From efce8422dd53fae6cdbd37741034ac4eb4730819 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 19 Oct 2023 14:44:52 +0530 Subject: [PATCH 51/51] powerpc/paravirt: Improve vcpu_is_preempted PowerVM Hypervisor dispatches on a whole core basis. In a shared LPAR, a CPU from a core that is CEDED or preempted may have a larger latency. In such a scenario, its preferable to choose a different CPU to run. If one of the CPUs in the core is active, i.e neither CEDED nor preempted, then consider this CPU as not preempted. Also if any of the CPUs in the core has yielded but OS has not requested CEDE or CONFER, then consider this CPU to be preempted. Correct detection of preempted CPUs is important for detecting idle CPUs/cores in task scheduler. Tested-by: Aboorva Devarajan Reviewed-by: Shrikanth Hegde Signed-off-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://msgid.link/20231019091452.95260-1-srikar@linux.vnet.ibm.com --- arch/powerpc/include/asm/paravirt.h | 47 +++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h index e08513d731193..ac4279208d633 100644 --- a/arch/powerpc/include/asm/paravirt.h +++ b/arch/powerpc/include/asm/paravirt.h @@ -71,6 +71,11 @@ static inline void yield_to_any(void) { plpar_hcall_norets_notrace(H_CONFER, -1, 0); } + +static inline bool is_vcpu_idle(int vcpu) +{ + return lppaca_of(vcpu).idle; +} #else static inline bool is_shared_processor(void) { @@ -100,6 +105,10 @@ static inline void prod_cpu(int cpu) ___bad_prod_cpu(); /* This would be a bug */ } +static inline bool is_vcpu_idle(int vcpu) +{ + return false; +} #endif #define vcpu_is_preempted vcpu_is_preempted @@ -121,9 +130,23 @@ static inline bool vcpu_is_preempted(int cpu) if (!is_shared_processor()) return false; + /* + * If the hypervisor has dispatched the target CPU on a physical + * processor, then the target CPU is definitely not preempted. + */ + if (!(yield_count_of(cpu) & 1)) + return false; + + /* + * If the target CPU has yielded to Hypervisor but OS has not + * requested idle then the target CPU is definitely preempted. + */ + if (!is_vcpu_idle(cpu)) + return true; + #ifdef CONFIG_PPC_SPLPAR if (!is_kvm_guest()) { - int first_cpu; + int first_cpu, i; /* * The result of vcpu_is_preempted() is used in a @@ -149,11 +172,29 @@ static inline bool vcpu_is_preempted(int cpu) */ if (cpu_first_thread_sibling(cpu) == first_cpu) return false; + + /* + * If any of the threads of the target CPU's core are not + * preempted or ceded, then consider target CPU to be + * non-preempted. + */ + first_cpu = cpu_first_thread_sibling(cpu); + for (i = first_cpu; i < first_cpu + threads_per_core; i++) { + if (i == cpu) + continue; + if (!(yield_count_of(i) & 1)) + return false; + if (!is_vcpu_idle(i)) + return true; + } } #endif - if (yield_count_of(cpu) & 1) - return true; + /* + * None of the threads in target CPU's core are running but none of + * them were preempted too. Hence assume the target CPU to be + * non-preempted. + */ return false; }