Projects
home:laokz:branches:openEuler:24.03:SP1:Everything
kernel
_service:tar_scm:0005-haoc-kernel.patch
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File _service:tar_scm:0005-haoc-kernel.patch of Package kernel
From 099672494b5fbd626031f313bbdbc42d1eb3f02d Mon Sep 17 00:00:00 2001 From: liuzh <liuzhh@zgclab.edu.cn> Date: Wed, 22 May 2024 16:36:55 +0800 Subject: [PATCH] Squashed commit of the following: commit 909ad06b3bf629d9af4e143347c1d1ef8a3a5808 Author: liuzh <liuzhh@zgclab.edu.cn> Date: Wed May 22 16:23:13 2024 +0800 fix mte_sync_tags() parameters after rebase to 6.6.0-27.0.0. commit 1cbf51371b0539a45f816419b2da82cf36162b4a Author: ljl <ljl2013@163.com> Date: Mon Mar 25 08:01:32 2024 +0000 IEE SI: Removed redundant codes. commit 0178bfc79ad1769a36f4165348a671d2182cff55 Author: zhangsy <zhangshiyang17@mails.ucas.ac.cn> Date: Mon Mar 25 11:01:11 2024 +0800 Fix bugs on qemu when opening CONFIG_CREDP. commit 8e714f6e8f2ace5a6fc900b4bce6b03c83c41870 Author: ljl <ljl2013@163.com> Date: Thu Mar 21 04:44:26 2024 +0000 IEE SI: Remove PAN operations as BTLB BUG is already fixed. commit 7b5fc74cb99e377d3bc59da81612cd6f3dd8a4d8 Author: ljl <lvjinglin2013@163.com> Date: Wed Mar 20 18:31:47 2024 +0800 IEE SI: Migration of iee rwx gate. commit aad2c7e89c9c4ad8ff0fb3ee53cd1b974144a283 Author: liuzh <liuzhh@zgclab.edu.cn> Date: Mon Mar 18 15:32:43 2024 +0800 modify slub.c set_track_prepare() commit 7452bac06ec09bf8321dfdbfb8b6a429d2cd8637 Author: zhangsy <zhangshiyang17@mails.ucas.ac.cn> Date: Thu Mar 21 11:26:19 2024 +0800 Set pgd of lm Privileged. commit 33934cfc3eed798a3a687bf86c6bd92697e68ba9 Author: zhangsy <zhangshiyang17@mails.ucas.ac.cn> Date: Tue Mar 19 17:14:32 2024 +0800 Delete some redundant code and put trans_pgd into IEE. commit 2bfe9008a72f8b8ac237bc7a5f99f9d40e84c247 Author: zhangshiyang17@mails.ucas.ac.cn <zhangshiyang17@mails.ucas.ac.cn> Date: Mon Mar 18 11:47:50 2024 +0000 Fix bugs on physical when opening CONFIG_IEE and CONFIG_PTP. commit dafa2df600757511ce3e8f178e05e28adabdf39b Author: zhangsy <zhangshiyang17@mails.ucas.ac.cn> Date: Mon Mar 18 10:40:42 2024 +0800 Fix bugs on qemu when opening CONFIG_IEE and CONFIG_PTP. commit 9231a9f6b34c62090b5f202c9c64a52bfdac7a73 Author: zhangsy <zhangshiyang17@mails.ucas.ac.cn> Date: Thu Mar 14 16:34:53 2024 +0800 Fix compiling bugs of CONFIG_PTP. commit 6469df3bcce32896c2cb297d3cd7ead82c33f35d Author: zhangsy <zhangshiyang17@mails.ucas.ac.cn> Date: Thu Mar 14 11:10:00 2024 +0800 Fix bugs on qemu when opening CONFIG_IEE and CONFIG_INTERRUPTABLE. commit 5f1773dada622a3514c9ed6aa72dd50e918f2664 Author: zhangsy <zhangshiyang17@mails.ucas.ac.cn> Date: Wed Mar 13 17:31:39 2024 +0800 Fix bugs on qemu when opening CONFIG_IEE. commit 73f433a093fa84cffa5e11e86bed6f17c9b30a39 Author: liuzh <liuzhh@zgclab.edu.cn> Date: Tue Mar 12 15:32:29 2024 +0800 fix the map of IEE_SI_TEXT. commit 9b92deb4b2338093d9b04f4b81f162855b31c983 Author: liuzh <liuzhh@zgclab.edu.cn> Date: Sun Mar 10 16:11:13 2024 +0800 modified to be able to compile. can start the kernel with qemu and successfully reach `start_kernel()`. commit e892ec4790d72e9433b48b0221e7e6dc4c361dd9 Author: liuzh <liuzhh@zgclab.edu.cn> Date: Thu Mar 7 14:27:45 2024 +0800 fix some conflicts commit fdec7e39345e81e867e01258487f88801b790b02 Author: liuzh <liuzhh@zgclab.edu.cn> Date: Wed Mar 6 12:31:11 2024 +0800 migrate openeuler-commit code. (need some fix before compiling) --- Makefile | 3 +- arch/arm64/Kconfig | 18 + arch/arm64/include/asm/assembler.h | 67 + arch/arm64/include/asm/daifflags.h | 16 + arch/arm64/include/asm/efi.h | 4 + arch/arm64/include/asm/fixmap.h | 3 + arch/arm64/include/asm/hw_breakpoint.h | 12 + arch/arm64/include/asm/iee-access.h | 36 + arch/arm64/include/asm/iee-cred.h | 150 ++ arch/arm64/include/asm/iee-def.h | 74 + arch/arm64/include/asm/iee-si.h | 64 + arch/arm64/include/asm/iee-slab.h | 23 + arch/arm64/include/asm/iee-token.h | 40 + arch/arm64/include/asm/iee.h | 10 + arch/arm64/include/asm/kernel-pgtable.h | 21 + arch/arm64/include/asm/koi.h | 335 +++++ arch/arm64/include/asm/memory.h | 24 + arch/arm64/include/asm/mmu_context.h | 20 + arch/arm64/include/asm/pgalloc.h | 4 + arch/arm64/include/asm/pgtable-hwdef.h | 11 + arch/arm64/include/asm/pgtable.h | 304 +++- arch/arm64/include/asm/pointer_auth.h | 5 + arch/arm64/include/asm/sysreg.h | 58 + arch/arm64/include/asm/tlb.h | 9 + arch/arm64/include/asm/tlbflush.h | 58 +- arch/arm64/kernel/Makefile | 2 + arch/arm64/kernel/armv8_deprecated.c | 16 + arch/arm64/kernel/asm-offsets.c | 11 + arch/arm64/kernel/cpu_errata.c | 12 + arch/arm64/kernel/cpufeature.c | 79 + arch/arm64/kernel/debug-monitors.c | 4 + arch/arm64/kernel/entry-common.c | 4 + arch/arm64/kernel/entry.S | 611 ++++++++ arch/arm64/kernel/fpsimd.c | 4 + arch/arm64/kernel/head.S | 56 + arch/arm64/kernel/hibernate.c | 14 + arch/arm64/kernel/hw_breakpoint.c | 99 ++ arch/arm64/kernel/iee/Makefile | 1 + arch/arm64/kernel/iee/iee-func.c | 187 +++ arch/arm64/kernel/iee/iee-gate.S | 174 +++ arch/arm64/kernel/iee/iee.c | 1360 +++++++++++++++++ arch/arm64/kernel/koi/Makefile | 1 + arch/arm64/kernel/koi/koi.c | 1327 +++++++++++++++++ arch/arm64/kernel/mte.c | 5 + arch/arm64/kernel/process.c | 19 +- arch/arm64/kernel/proton-pack.c | 8 + arch/arm64/kernel/setup.c | 33 + arch/arm64/kernel/traps.c | 26 + arch/arm64/kernel/vmlinux.lds.S | 61 + arch/arm64/mm/context.c | 91 +- arch/arm64/mm/fault.c | 9 + arch/arm64/mm/fixmap.c | 74 +- arch/arm64/mm/init.c | 34 + arch/arm64/mm/mmu.c | 1780 +++++++++++++++++++---- arch/arm64/mm/pgd.c | 39 + arch/arm64/mm/proc.S | 28 + arch/arm64/mm/trans_pgd.c | 46 + drivers/firmware/efi/arm-runtime.c | 4 + drivers/firmware/efi/memmap.c | 20 + drivers/tty/serial/earlycon.c | 4 + drivers/usb/early/ehci-dbgp.c | 4 + fs/coredump.c | 8 + fs/exec.c | 20 + fs/nfs/flexfilelayout/flexfilelayout.c | 9 + fs/nfs/nfs4idmap.c | 9 + fs/nfsd/auth.c | 38 + fs/nfsd/nfs4callback.c | 12 +- fs/nfsd/nfs4recover.c | 9 + fs/nfsd/nfsfh.c | 9 + fs/open.c | 26 + fs/overlayfs/dir.c | 9 + fs/overlayfs/super.c | 12 + fs/smb/client/cifs_spnego.c | 9 + fs/smb/client/cifsacl.c | 9 + include/asm-generic/early_ioremap.h | 3 + include/asm-generic/fixmap.h | 18 + include/asm-generic/pgalloc.h | 54 + include/asm-generic/vmlinux.lds.h | 24 +- include/linux/cred.h | 45 +- include/linux/efi.h | 9 + include/linux/iee-func.h | 27 + include/linux/module.h | 1 + include/linux/sched.h | 19 + init/main.c | 28 +- kernel/cred.c | 182 +++ kernel/exit.c | 8 + kernel/fork.c | 316 ++-- kernel/groups.c | 7 + kernel/kthread.c | 13 + kernel/smpboot.c | 9 + kernel/sys.c | 107 ++ kernel/umh.c | 10 + kernel/user_namespace.c | 18 + mm/Kconfig | 12 + mm/damon/ops-common.c | 1 + mm/debug_vm_pgtable.c | 24 + mm/early_ioremap.c | 57 + mm/huge_memory.c | 30 +- mm/init-mm.c | 17 + mm/memory.c | 14 + mm/slub.c | 198 ++- mm/sparse-vmemmap.c | 21 + mm/vmalloc.c | 2 +- net/dns_resolver/dns_key.c | 9 + security/commoncap.c | 169 +++ security/keys/keyctl.c | 23 + security/keys/process_keys.c | 53 + security/security.c | 15 + 109 files changed, 8945 insertions(+), 397 deletions(-) create mode 100644 arch/arm64/include/asm/iee-access.h create mode 100644 arch/arm64/include/asm/iee-cred.h create mode 100644 arch/arm64/include/asm/iee-def.h create mode 100644 arch/arm64/include/asm/iee-si.h create mode 100644 arch/arm64/include/asm/iee-slab.h create mode 100644 arch/arm64/include/asm/iee-token.h create mode 100644 arch/arm64/include/asm/iee.h create mode 100644 arch/arm64/include/asm/koi.h create mode 100644 arch/arm64/kernel/iee/Makefile create mode 100644 arch/arm64/kernel/iee/iee-func.c create mode 100644 arch/arm64/kernel/iee/iee-gate.S create mode 100644 arch/arm64/kernel/iee/iee.c create mode 100644 arch/arm64/kernel/koi/Makefile create mode 100644 arch/arm64/kernel/koi/koi.c create mode 100644 include/linux/iee-func.h diff --git a/Makefile b/Makefile index 8e6d9b894b1e..20c367b5957d 100644 --- a/Makefile +++ b/Makefile @@ -554,7 +554,7 @@ LINUXINCLUDE := \ -I$(objtree)/include \ $(USERINCLUDE) -KBUILD_AFLAGS := -D__ASSEMBLY__ -fno-PIE +KBUILD_AFLAGS := -D__ASSEMBLY__ -fno-PIE -march=armv8.1-a KBUILD_CFLAGS := KBUILD_CFLAGS += -std=gnu11 @@ -563,6 +563,7 @@ KBUILD_CFLAGS += -funsigned-char KBUILD_CFLAGS += -fno-common KBUILD_CFLAGS += -fno-PIE KBUILD_CFLAGS += -fno-strict-aliasing +KBUILD_CFLAGS += -march=armv8.1-a KBUILD_CPPFLAGS := -D__KERNEL__ KBUILD_RUSTFLAGS := $(rust_common_flags) \ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 2a875546bdc7..7448afc90c0a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1730,6 +1730,24 @@ config UNMAP_KERNEL_AT_EL0 If unsure, say Y. +# Config for iee +config IEE + depends on ARM64 + depends on ARM64_PAN + depends on ARM64_VA_BITS_48 + depends on ARM64_4K_PAGES + def_bool y + +# Config for support of interruption of iee +config IEE_INTERRUPTABLE + depends on IEE + def_bool n + +# Config for credentials isolation +config CREDP + depends on IEE + def_bool y + config MITIGATE_SPECTRE_BRANCH_HISTORY bool "Mitigate Spectre style attacks against branch history" if EXPERT default y diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 38b23786aeb4..6af10d509c2e 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -26,6 +26,41 @@ #include <asm/ptrace.h> #include <asm/thread_info.h> +#ifdef CONFIG_IEE + .macro iee_si_restore_daif, flags:req + msr daifclr, #0xf + tbnz \flags, #6, 114221f + tbnz \flags, #7, 114210f + tbnz \flags, #8, 114100f + msr daifset, #0b000 + b 114514f +114221: + tbnz \flags, #7, 114211f + tbnz \flags, #8, 114101f + msr daifset, #0b001 + b 114514f +114211: + tbnz \flags, #8, 114111f + msr daifset, #0b011 + b 114514f +114210: + tbnz \flags, #8, 114110f + msr daifset, #0b010 + b 114514f +114100: + msr daifset, #0b100 + b 114514f +114101: + msr daifset, #0b101 + b 114514f +114110: + msr daifset, #0b110 + b 114514f +114111: + msr daifset, #0b111 +114514: + .endm +#endif /* * Provide a wxN alias for each wN register so what we can paste a xN * reference after a 'w' to obtain the 32-bit version. @@ -52,7 +87,11 @@ alternative_else_nop_endif .macro disable_daif disable_allint +// #ifdef CONFIG_IEE +// msr daifset, #0x7 +// #else msr daifset, #0xf +// #endif .endm .macro enable_daif @@ -69,7 +108,11 @@ alternative_else_nop_endif .endm .macro restore_irq, flags +// #ifdef CONFIG_IEE +// iee_si_restore_daif \flags +// #else msr daif, \flags +// #endif .endm .macro enable_dbg @@ -77,20 +120,44 @@ alternative_else_nop_endif .endm .macro disable_step_tsk, flgs, tmp +// #ifdef CONFIG_IEE +// 1145: +// tbz \flgs, #TIF_SINGLESTEP, 9990f +// mrs \tmp, mdscr_el1 +// bic \tmp, \tmp, #DBG_MDSCR_SS +// orr \tmp, \tmp, #DBG_MDSCR_MDE +// msr mdscr_el1, \tmp +// isb // Synchronise with enable_dbg +// mrs \tmp, mdscr_el1 +// tbz \tmp, #15, 1145b +// #else tbz \flgs, #TIF_SINGLESTEP, 9990f mrs \tmp, mdscr_el1 bic \tmp, \tmp, #DBG_MDSCR_SS msr mdscr_el1, \tmp isb // Synchronise with enable_dbg +// #endif 9990: .endm /* call with daif masked */ .macro enable_step_tsk, flgs, tmp +// #ifdef CONFIG_IEE +// 1146: +// tbz \flgs, #TIF_SINGLESTEP, 9990f +// mrs \tmp, mdscr_el1 +// orr \tmp, \tmp, #DBG_MDSCR_SS +// orr \tmp, \tmp, #DBG_MDSCR_MDE +// msr mdscr_el1, \tmp +// isb // Synchronise with enable_dbg +// mrs \tmp, mdscr_el1 +// tbz \tmp, #15, 1146b +// #else tbz \flgs, #TIF_SINGLESTEP, 9990f mrs \tmp, mdscr_el1 orr \tmp, \tmp, #DBG_MDSCR_SS msr mdscr_el1, \tmp +// #endif 9990: .endm diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 2417cc6b1631..cb5b4c2e03b8 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -26,11 +26,19 @@ static inline void local_daif_mask(void) (read_sysreg_s(SYS_ICC_PMR_EL1) == (GIC_PRIO_IRQOFF | GIC_PRIO_PSR_I_SET))); +// #ifdef CONFIG_IEE +// asm volatile( +// "msr daifset, #0x7 // local_daif_mask\n" +// : +// : +// : "memory"); +// #else asm volatile( "msr daifset, #0xf // local_daif_mask\n" : : : "memory"); +// #endif /* Don't really care for a dsb here, we don't intend to enable IRQs */ if (system_uses_irq_prio_masking()) @@ -118,7 +126,11 @@ static inline void local_daif_restore(unsigned long flags) gic_write_pmr(pmr); } +// #ifdef CONFIG_IEE +// iee_si_write_daif(flags); +// #else write_sysreg(flags, daif); +// #endif /* If we can take asynchronous errors we can take NMIs */ if (system_uses_nmi()) { @@ -151,7 +163,11 @@ static inline void local_daif_inherit(struct pt_regs *regs) * system_has_prio_mask_debugging() won't restore the I bit if it can * use the pmr instead. */ +// #ifdef CONFIG_IEE +// iee_si_write_daif(flags); +// #else write_sysreg(flags, daif); +// #endif /* The ALLINT field is at the same position in pstate and ALLINT */ if (system_uses_nmi()) { diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index bcd5622aa096..76c4bd6c2b20 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -58,7 +58,11 @@ void arch_efi_call_virt_teardown(void); #define arch_efi_save_flags(state_flags) \ ((void)((state_flags) = read_sysreg(daif))) +// #ifdef CONFIG_IEE +// #define arch_efi_restore_flags(state_flags) iee_si_write_daif(state_flags) +// #else #define arch_efi_restore_flags(state_flags) write_sysreg(state_flags, daif) +// #endif /* arch specific definitions used by the stub code */ diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 58c294a96676..095a0731dce3 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -108,6 +108,9 @@ void __init fixmap_copy(pgd_t *pgdir); #define __late_clear_fixmap(idx) __set_fixmap((idx), 0, FIXMAP_PAGE_CLEAR) extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot); +#ifdef CONFIG_PTP +extern void __iee_set_fixmap_pre_init(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot); +#endif #include <asm-generic/fixmap.h> diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index 84055329cd8b..f72d89bb9a32 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -104,6 +104,18 @@ static inline void decode_ctrl_reg(u32 reg, write_sysreg(VAL, dbg##REG##N##_el1);\ } while (0) +#ifdef CONFIG_IEE +#define IEE_SI_AARCH64_DBG_READ(N, REG, VAL) do{\ + VAL = this_cpu_read(iee_si_user_##REG##N);\ +} while (0) + +#define IEE_SI_AARCH64_DBG_WRITE(N, REG, VAL) do{\ + u64 __val = (u64)(VAL); \ + this_cpu_write(iee_si_user_##REG##N, __val);\ + iee_rwx_gate_entry(IEE_WRITE_AFSR0);\ +} while (0) +#endif + struct task_struct; struct notifier_block; struct perf_event_attr; diff --git a/arch/arm64/include/asm/iee-access.h b/arch/arm64/include/asm/iee-access.h new file mode 100644 index 000000000000..79604c21a510 --- /dev/null +++ b/arch/arm64/include/asm/iee-access.h @@ -0,0 +1,36 @@ +#ifndef _LINUX_IEE_ACCESS_H +#define _LINUX_IEE_ACCESS_H + +#include <asm/iee-def.h> +#include <asm/iee-slab.h> + +extern unsigned long long iee_rw_gate(int flag, ...); + +#ifdef CONFIG_IEE +void iee_write_in_byte(void *ptr, u64 data, int length) +{ + iee_rw_gate(IEE_WRITE_IN_BYTE, ptr, data, length); +} + +void iee_memset(void *ptr, int data, size_t n) +{ + iee_rw_gate(IEE_MEMSET, ptr, data, n); +} + +void iee_set_track(struct track *ptr, struct track *data) +{ + iee_rw_gate(IEE_OP_SET_TRACK, ptr, data); +} + +void iee_set_freeptr(freeptr_t *pptr, freeptr_t ptr) +{ + iee_rw_gate(IEE_OP_SET_FREEPTR, pptr, ptr); +} + +void iee_write_entry_task(struct task_struct *tsk) +{ + iee_rw_gate(IEE_WRITE_ENTRY_TASK, tsk); +} +#endif + +#endif \ No newline at end of file diff --git a/arch/arm64/include/asm/iee-cred.h b/arch/arm64/include/asm/iee-cred.h new file mode 100644 index 000000000000..b8c3bb53f98a --- /dev/null +++ b/arch/arm64/include/asm/iee-cred.h @@ -0,0 +1,150 @@ +#ifndef _LINUX_IEE_CRED_H +#define _LINUX_IEE_CRED_H + +#include <linux/cred.h> +#include <asm/iee-def.h> + +extern unsigned long long iee_rw_gate(int flag, ...); + +#ifdef CONFIG_CREDP +static void __maybe_unused iee_copy_cred(const struct cred *old, struct cred *new) +{ + iee_rw_gate(IEE_OP_COPY_CRED,old,new); +} + +static void __maybe_unused iee_set_cred_uid(struct cred *cred, kuid_t uid) +{ + iee_rw_gate(IEE_OP_SET_CRED_UID,cred,uid); +} + +static void __maybe_unused iee_set_cred_gid(struct cred *cred, kgid_t gid) +{ + iee_rw_gate(IEE_OP_SET_CRED_GID,cred,gid); +} + +static void __maybe_unused iee_set_cred_suid(struct cred *cred, kuid_t suid) +{ + iee_rw_gate(IEE_OP_SET_CRED_SUID,cred,suid); +} + +static void __maybe_unused iee_set_cred_sgid(struct cred *cred, kgid_t sgid) +{ + iee_rw_gate(IEE_OP_SET_CRED_SGID,cred,sgid); +} + +static void __maybe_unused iee_set_cred_euid(struct cred *cred, kuid_t euid) +{ + iee_rw_gate(IEE_OP_SET_CRED_EUID,cred,euid); +} + +static void __maybe_unused iee_set_cred_egid(struct cred *cred, kgid_t egid) +{ + iee_rw_gate(IEE_OP_SET_CRED_EGID,cred,egid); +} + +static void __maybe_unused iee_set_cred_fsuid(struct cred *cred, kuid_t fsuid) +{ + iee_rw_gate(IEE_OP_SET_CRED_FSUID,cred,fsuid); +} + +static void __maybe_unused iee_set_cred_fsgid(struct cred *cred, kgid_t fsgid) +{ + iee_rw_gate(IEE_OP_SET_CRED_FSGID,cred,fsgid); +} + +static void __maybe_unused iee_set_cred_user(struct cred *cred, struct user_struct *user) +{ + iee_rw_gate(IEE_OP_SET_CRED_USER,cred,user); +} + +static void __maybe_unused iee_set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) +{ + iee_rw_gate(IEE_OP_SET_CRED_USER_NS,cred,user_ns); +} + +static void __maybe_unused iee_set_cred_ucounts(struct cred *cred, struct ucounts *ucounts) +{ + iee_rw_gate(IEE_OP_SET_CRED_UCOUNTS,cred,ucounts); +} + +static void __maybe_unused iee_set_cred_group_info(struct cred *cred, struct group_info *group_info) +{ + iee_rw_gate(IEE_OP_SET_CRED_GROUP_INFO,cred,group_info); +} + +static void __maybe_unused iee_set_cred_securebits(struct cred *cred, unsigned securebits) +{ + iee_rw_gate(IEE_OP_SET_CRED_SECUREBITS,cred,securebits); +} + +static void __maybe_unused iee_set_cred_cap_inheritable(struct cred *cred, kernel_cap_t cap_inheritable) +{ + iee_rw_gate(IEE_OP_SET_CRED_CAP_INHER,cred,cap_inheritable); +} + +static void __maybe_unused iee_set_cred_cap_permitted(struct cred *cred, kernel_cap_t cap_permitted) +{ + iee_rw_gate(IEE_OP_SET_CRED_CAP_PERM,cred,cap_permitted); +} + +static void __maybe_unused iee_set_cred_cap_effective(struct cred *cred, kernel_cap_t cap_effective) +{ + iee_rw_gate(IEE_OP_SET_CRED_CAP_EFFECT,cred,cap_effective); +} + +static void __maybe_unused iee_set_cred_cap_bset(struct cred *cred, kernel_cap_t cap_bset) +{ + iee_rw_gate(IEE_OP_SET_CRED_CAP_BSET,cred,cap_bset); +} + +static void __maybe_unused iee_set_cred_cap_ambient(struct cred *cred, kernel_cap_t cap_ambient) +{ + iee_rw_gate(IEE_OP_SET_CRED_CAP_AMBIENT,cred,cap_ambient); +} + +#ifdef CONFIG_KEYS +static void __maybe_unused iee_set_cred_jit_keyring(struct cred *cred, unsigned char jit_keyring) +{ + iee_rw_gate(IEE_OP_SET_CRED_JIT_KEYRING,cred,jit_keyring); +} + +static void __maybe_unused iee_set_cred_session_keyring(struct cred *cred, struct key *session_keyring) +{ + iee_rw_gate(IEE_OP_SET_CRED_SESS_KEYRING,cred,session_keyring); +} + +static void __maybe_unused iee_set_cred_process_keyring(struct cred *cred, struct key *process_keyring) +{ + iee_rw_gate(IEE_OP_SET_CRED_PROC_KEYRING,cred,process_keyring); +} + +static void __maybe_unused iee_set_cred_thread_keyring(struct cred *cred, struct key *thread_keyring) +{ + iee_rw_gate(IEE_OP_SET_CRED_THREAD_KEYRING,cred,thread_keyring); +} + +static void __maybe_unused iee_set_cred_request_key_auth(struct cred *cred, struct key *request_key_auth) +{ + iee_rw_gate(IEE_OP_SET_CRED_REQ_KEYRING,cred,request_key_auth); +} +#endif + +static void __maybe_unused iee_set_cred_atomic_set_usage(struct cred *cred, int i) +{ + iee_rw_gate(IEE_OP_SET_CRED_ATSET_USAGE,cred,i); +} + +#ifdef CONFIG_SECURITY +static void __maybe_unused iee_set_cred_security(struct cred *cred, void *security) +{ + iee_rw_gate(IEE_OP_SET_CRED_SECURITY,cred,security); +} +#endif + +static void __maybe_unused iee_set_cred_rcu(struct cred *cred, struct rcu_head *rcu) +{ + iee_rw_gate(IEE_OP_SET_CRED_RCU,cred,rcu); +} +#endif + +#endif \ No newline at end of file diff --git a/arch/arm64/include/asm/iee-def.h b/arch/arm64/include/asm/iee-def.h new file mode 100644 index 000000000000..76e59259e4d1 --- /dev/null +++ b/arch/arm64/include/asm/iee-def.h @@ -0,0 +1,74 @@ +// Function Identifiers with Parameters Description + +#define IEE_WRITE_IN_BYTE 0 // Parameters: void *ptr, __u64 data, int length +#define IEE_OP_SET_PTE 1 // Parameters: pte_t *ptep, pte_t pte +#define IEE_OP_SET_PMD 2 // Parameters: pmd_t *pmdp, pmd_t pmd +#define IEE_OP_SET_PUD 3 // Parameters: pud_t *pudp, pud_t pud +#define IEE_OP_SET_P4D 4 // Parameters: p4d_t *p4dp, p4d_t p4d +#define IEE_OP_SET_BM_PTE 5 // Parameters: pte_t *ptep, pte_t pte +#define IEE_OP_SET_SWAPPER_PGD 6 // Parameters: pgd_t *pgdp, pgd_t pgd +#define IEE_OP_SET_TRAMP_PGD 7 // Parameters: pgd_t *pgdp, pgd_t pgd +#define IEE_OP_SET_CMPXCHG 8 // Parameters: pte_t *ptep, pteval_t old_pteval, pteval_t new_pteval +#define IEE_OP_SET_XCHG 9 // Parameters: pte_t *ptep, pteval_t pteval +#define IEE_OP_COPY_CRED 10 // Parameters: struct cred *old, struct cred *new +#define IEE_OP_SET_CRED_UID 11 // Parameters: struct cred *cred, kuid_t uid +#define IEE_OP_SET_CRED_GID 12 // Parameters: struct cred *cred, kgid_t gid +#define IEE_OP_SET_CRED_SUID 13 // Parameters: struct cred *cred, kuid_t suid +#define IEE_OP_SET_CRED_SGID 14 // Parameters: struct cred *cred, kgid_t sgid +#define IEE_OP_SET_CRED_EUID 15 // Parameters: struct cred *cred, kuid_t euid +#define IEE_OP_SET_CRED_EGID 16 // Parameters: struct cred *cred, kgid_t egid +#define IEE_OP_SET_CRED_FSUID 17 // Parameters: struct cred *cred, kuid_t fsuid +#define IEE_OP_SET_CRED_FSGID 18 // Parameters: struct cred *cred, kgid_t fsgid +#define IEE_OP_SET_CRED_USER 19 // Parameters: struct cred *cred, struct user_struct *user +#define IEE_OP_SET_CRED_USER_NS 20 // Parameters: struct cred *cred, struct user_namespace *user_ns +#define IEE_OP_SET_CRED_GROUP_INFO 21 // Parameters: struct cred *cred, struct group_info *group_info +#define IEE_OP_SET_CRED_SECUREBITS 22 // Parameters: struct cred *cred, unsigned securebits +#define IEE_OP_SET_CRED_CAP_INHER 23 // Parameters: struct cred *cred, kernel_cap_t cap_inheritable +#define IEE_OP_SET_CRED_CAP_PERM 24 // Parameters: struct cred *cred, kernel_cap_t cap_permitted +#define IEE_OP_SET_CRED_CAP_EFFECT 25 // Parameters: struct cred *cred, kernel_cap_t cap_effective +#define IEE_OP_SET_CRED_CAP_BSET 26 // Parameters: struct cred *cred, kernel_cap_t cap_bset +#define IEE_OP_SET_CRED_CAP_AMBIENT 27 // Parameters: struct cred *cred, kernel_cap_t cap_ambient +#define IEE_OP_SET_CRED_JIT_KEYRING 28 // Parameters: struct cred *cred, unsigned char jit_keyring +#define IEE_OP_SET_CRED_SESS_KEYRING 29 // Parameters: struct cred *cred, struct key *session_keyring +#define IEE_OP_SET_CRED_PROC_KEYRING 30 // Parameters: struct cred *cred, struct key *process_keyring +#define IEE_OP_SET_CRED_THREAD_KEYRING 31 // Parameters: struct cred *cred, struct key *thread_keyring +#define IEE_OP_SET_CRED_REQ_KEYRING 32 // Parameters: struct cred *cred, struct key *request_key_auth +#define IEE_OP_SET_CRED_NON_RCU 33 // Parameters: struct cred *cred, int non_rcu +#define IEE_OP_SET_CRED_ATSET_USAGE 34 // Parameters: struct cred *cred, int i +#define IEE_OP_SET_CRED_ATOP_USAGE 35 // Parameters: struct cred *cred, int flag +#define IEE_OP_SET_CRED_SECURITY 36 // Parameters: struct cred *cred, void *security +#define IEE_OP_SET_CRED_RCU 37 // Parameters: struct cred *cred, struct rcu_head *rcu +#define IEE_MEMSET 38 // Parameters: void *ptr, int data, size_t n +#define IEE_OP_SET_TRACK 39 // Parameters: struct track *ptr, struct track *data +#define IEE_OP_SET_FREEPTR 40 // Parameters: void **pptr, void *ptr +#define IEE_OP_SET_PTE_U 41 // Parameters: pte_t *ptep, pte_t pte +#define IEE_OP_SET_PTE_P 42 // Parameters: pte_t *ptep, pte_t pte +#define IEE_SET_TOKEN_MM 43 // Parameters: struct task_token *token, struct mm_struct *mm +#define IEE_SET_TOKEN_PGD 44 // Parameters: struct task_token *token, pgd_t *pgd +#define IEE_INIT_TOKEN 45 // Parameters: struct task_struct *tsk, void *kernel_stack, void *iee_stack +#define IEE_FREE_TOKEN 46 // Parameters: struct task_struct *tsk +#define IEE_READ_TOKEN_STACK 47 // Parameters: struct task_struct *tsk +#define IEE_WRITE_ENTRY_TASK 48 // Parameters: struct task_struct *tsk +#define IEE_OP_SET_CRED_UCOUNTS 49 // Parameters: struct cred *cred, struct ucounts *ucounts +#ifdef CONFIG_KOI +#define IEE_READ_KOI_STACK 50 // Parameters: struct task_struct *tsk +#define IEE_WRITE_KOI_STACK 51 // Parameters: struct task_struct *tsk, unsigned long koi_stack +#define IEE_READ_TOKEN_TTBR1 52 // Parameters: struct task_struct *tsk +#define IEE_WRITE_TOKEN_TTBR1 53 // Parameters: struct task_struct *tsk, unsigned long current_ttbr1 +#define IEE_READ_KOI_KERNEL_STACK 54 // Parameters: struct task_struct *tsk +#define IEE_WRITE_KOI_KERNEL_STACK 55 // Parameters: struct task_struct *tsk, unsigned long kernel_stack +#define IEE_READ_KOI_STACK_BASE 56 // Parameters: struct task_struct *tsk +#define IEE_WRITE_KOI_STACK_BASE 57 // Parameters: struct task_struct *tsk, unsigned long koi_stack_base +#endif + +/* Add new IEE ops here */ + +#define AT_ADD 1 +#define AT_INC_NOT_ZERO 2 +#define AT_SUB_AND_TEST 3 +/* Atomic ops for atomic_t */ + +#ifdef CONFIG_KOI +#define IEE_SWITCH_TO_KERNEL 7 +#define IEE_SWITCH_TO_KOI 8 +#endif \ No newline at end of file diff --git a/arch/arm64/include/asm/iee-si.h b/arch/arm64/include/asm/iee-si.h new file mode 100644 index 000000000000..e67d81db66a5 --- /dev/null +++ b/arch/arm64/include/asm/iee-si.h @@ -0,0 +1,64 @@ +#ifndef _LINUX_IEE_SI_H +#define _LINUX_IEE_SI_H + +#include <asm/sysreg.h> +#define __iee_si_code __section(".iee.si_text") +#define __iee_si_data __section(".iee.si_data") + +/* Used for copying globals that iee rwx gate needs. */ +extern unsigned long iee_base_idmap_pg_dir; +extern unsigned long iee_base_reserved_pg_dir; +extern unsigned long iee_base__bp_harden_el1_vectors; +extern bool iee_init_done; +extern unsigned long iee_si_tcr; + +/* The following are __init functions used for iee si initialization. */ +extern void iee_si_prepare_data(void); + +extern unsigned long __iee_si_start[]; +// Handler function for sensitive inst +u64 iee_si_handler(int flag, ...); +/* + * TODO: scan a page to check whether it contains sensitive instructions + * return 1 when finding sensitive inst, 0 on safe page. + */ +extern int iee_si_scan_page(unsigned long addr); + + +#define DBG_MDSCR_SS (1 << 0) +#define DBG_MDSCR_MDE (1 << 15) + +#define IEE_SI_TEST 0 +#define IEE_WRITE_SCTLR 1 +#define IEE_WRITE_TTBR0 2 +#define IEE_WRITE_VBAR 3 +#define IEE_WRITE_TCR 4 +#define IEE_WRITE_MDSCR 5 +#define IEE_CONTEXT_SWITCH 6 +// #define IEE_WRITE_AFSR0 10 +/* Provide ttbr1 switch gate for KOI */ +#ifdef CONFIG_KOI +#define IEE_SWITCH_TO_KERNEL 7 +#define IEE_SWITCH_TO_KOI 8 +#endif +/* MASK modify-permitted bits on IEE protected sys registers */ +#define IEE_SCTLR_MASK (SCTLR_EL1_CP15BEN | SCTLR_EL1_SED | SCTLR_EL1_UCT | SCTLR_EL1_UCI |\ + SCTLR_EL1_BT0 | SCTLR_EL1_BT1 | SCTLR_EL1_TCF0_MASK | SCTLR_ELx_DSSBS |\ + SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | SCTLR_ELx_ENDA | SCTLR_ELx_ENDB|\ + SCTLR_EL1_SPINTMASK | SCTLR_EL1_NMI | SCTLR_EL1_TIDCP | SCTLR_EL1_MSCEn|\ + SCTLR_ELx_ENTP2 | SCTLR_EL1_TCF_MASK) +#define IEE_TTBR0_MASK ~0 +#define IEE_TTBR1_MASK ~0 +#define IEE_TCR_MASK (TCR_HD | TCR_T0SZ_MASK | TCR_E0PD1) +#define IEE_MDSCR_MASK (DBG_MDSCR_SS | DBG_MDSCR_MDE) + +#define IEE_DBGBCR_BT 0b0000 << 20 +#define IEE_DBGBCR_SSC 0b00 << 14 +#define IEE_DBGBCR_HMC 0b1 << 13 +#define IEE_DBGBCR_BAS 0b1111 << 5 +#define IEE_DBGBCR_PMC 0b11 << 1 +#define IEE_DBGBCR_E 0b1 +#define IEE_DBGBCR IEE_DBGBCR_BT | IEE_DBGBCR_SSC | IEE_DBGBCR_HMC | IEE_DBGBCR_BAS \ + | IEE_DBGBCR_PMC | IEE_DBGBCR_E + +#endif \ No newline at end of file diff --git a/arch/arm64/include/asm/iee-slab.h b/arch/arm64/include/asm/iee-slab.h new file mode 100644 index 000000000000..4f3c17c7da00 --- /dev/null +++ b/arch/arm64/include/asm/iee-slab.h @@ -0,0 +1,23 @@ +#ifndef _LINUX_IEE_SLAB_H +#define _LINUX_IEE_SLAB_H +/* + * Tracking user of a slab. + */ +#include <linux/stackdepot.h> + +#define TRACK_ADDRS_COUNT 16 +struct track { + unsigned long addr; /* Called from address */ +#ifdef CONFIG_STACKDEPOT + depot_stack_handle_t handle; +#endif + int cpu; /* Was running on cpu */ + int pid; /* Pid context */ + unsigned long when; /* When did the operation occur */ +}; + +enum track_item { TRACK_ALLOC, TRACK_FREE }; + +typedef struct { unsigned long v; } freeptr_t; + +#endif \ No newline at end of file diff --git a/arch/arm64/include/asm/iee-token.h b/arch/arm64/include/asm/iee-token.h new file mode 100644 index 000000000000..152474e1a187 --- /dev/null +++ b/arch/arm64/include/asm/iee-token.h @@ -0,0 +1,40 @@ +#ifndef _LINUX_IEE_TOKEN_H +#define _LINUX_IEE_TOKEN_H + +#include <asm/iee-def.h> + +extern unsigned long long iee_rw_gate(int flag, ...); +struct task_token; +struct task_struct; +struct mm_struct; + +#ifdef CONFIG_IEE +void iee_set_token_mm(struct task_struct *tsk, struct mm_struct *mm) +{ + iee_rw_gate(IEE_SET_TOKEN_MM, tsk, mm); +} + +void iee_set_token_pgd(struct task_struct *tsk, pgd_t *pgd) +{ + iee_rw_gate(IEE_SET_TOKEN_PGD, tsk, pgd); +} + +void iee_init_token(struct task_struct *tsk, void *kernel_stack, void *iee_stack) +{ + iee_rw_gate(IEE_INIT_TOKEN, tsk, kernel_stack, iee_stack); +} + +void iee_free_token(struct task_struct *tsk) +{ + iee_rw_gate(IEE_FREE_TOKEN, tsk); +} + +unsigned long iee_read_token_stack(struct task_struct *tsk) +{ + unsigned long ret; + ret = iee_rw_gate(IEE_READ_TOKEN_STACK, tsk); + return ret; +} +#endif + +#endif \ No newline at end of file diff --git a/arch/arm64/include/asm/iee.h b/arch/arm64/include/asm/iee.h new file mode 100644 index 000000000000..598f6d0b2626 --- /dev/null +++ b/arch/arm64/include/asm/iee.h @@ -0,0 +1,10 @@ +#ifndef _LINUX_IEE_H +#define _LINUX_IEE_H +#define __iee_code __section(".iee.text") +#define __iee_header __section(".iee.text.header") + +u64 iee_dispatch(int flag, ...); + +#include <asm/iee-def.h> + +#endif diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 85d26143faa5..e7a3081ce285 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -118,4 +118,25 @@ #define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PTE_RDONLY) #endif +#ifdef CONFIG_IEE + +#ifdef CONFIG_ARM64_4K_PAGES // zgcXXX: it has been deleted in 6.6. +#define ARM64_SWAPPER_USES_SECTION_MAPS 1 +#else +#define ARM64_SWAPPER_USES_SECTION_MAPS 0 +#endif + +#define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) // zgcXXX: warning: 6.6 delete this macro. should delete this line later. + +#define SWAPPER_PTE_FLAGS_IDMAP (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED | PTE_RDONLY) +#define SWAPPER_PMD_FLAGS_IDMAP (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S | PMD_SECT_RDONLY) + +#if ARM64_SWAPPER_USES_SECTION_MAPS +#define SWAPPER_MM_MMUFLAGS_IDMAP (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS_IDMAP) +#else +#define SWAPPER_MM_MMUFLAGS_IDMAP (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS_IDMAP) +#endif + +#endif + #endif /* __ASM_KERNEL_PGTABLE_H */ diff --git a/arch/arm64/include/asm/koi.h b/arch/arm64/include/asm/koi.h new file mode 100644 index 000000000000..48d9a1378a1d --- /dev/null +++ b/arch/arm64/include/asm/koi.h @@ -0,0 +1,335 @@ +#include "linux/mm.h" +#include "asm/current.h" +#include "asm/pgtable-hwdef.h" +#include "asm/pgtable-types.h" +#include "asm/pgtable.h" +#include "linux/mm_types.h" +#include "linux/pgtable.h" +#include "linux/printk.h" +#include "linux/slab.h" +#include "linux/string.h" +#include <linux/sched.h> +#include "linux/hashtable.h" +#include "linux/module.h" +#include "linux/vmalloc.h" +#include "stacktrace.h" +#include "asm/mmu.h" +#ifdef CONFIG_IEE +#include "asm/iee-si.h" +#include "asm/iee-def.h" +#endif + +#define HASH_TABLE_BIT 10 +#define HASH_TABLE_LEN (1 << HASH_TABLE_BIT) +#define HASH_KEY_MASK ((1 << HASH_TABLE_BIT) - 1) + +#define MAX_VAR_NAME 64 +#define DRIVER_ISOLATION_VAR_ARRAY_SIZE 32 +#define DRIVER_ISOLATION_MAX_VAL 256 + +extern struct hlist_head koi_mem_htbl[1024]; +extern spinlock_t koi_mem_htbl_spin_lock; +extern unsigned long koi_swapper_ttbr1; +extern s64 koi_offset; + +#ifdef CONFIG_IEE +extern unsigned long long iee_rw_gate(int flag, ...); +#endif + +DECLARE_PER_CPU(unsigned long[PAGE_SIZE / sizeof(unsigned long)], + koi_irq_current_ttbr1); + +/** +* struct koi_mem_hash_node - +*@mod:pointer to driver module +*@mem_list_head:free memory list head +*@ko_mm: mm_struct in each driver +*@pgdp:entry to Page Global Directory :pgd +*@node:hash linked list node +*@addr_htbl[1 << (HASH_TABLE_BIT)]: +*@rcu: +*/ +struct koi_mem_hash_node { + struct module *mod; + struct list_head mem_list_head; + struct mm_struct *ko_mm; + pgd_t *pgdp; + unsigned long ko_ttbr1; + struct hlist_node node; + struct hlist_head addr_htbl[1 << (HASH_TABLE_BIT)]; + struct rcu_head rcu; + // used to protect free mem list + spinlock_t spin_lock; + // used to protect addr hashtable + spinlock_t addr_htbl_spin_lock; +}; +//describe the global shared var +struct shared_variable_descriptor { + unsigned int id; + unsigned int type; + char name[MAX_VAR_NAME]; + unsigned long offset; + unsigned int size; + unsigned int self_ptr_ids[DRIVER_ISOLATION_VAR_ARRAY_SIZE]; +}; + +int koi_do_switch_to_kernel_pgtbl(void); + +int koi_copy_pagetable(struct mm_struct *ko_mm, pgd_t *koi_pg_dir, + unsigned long addr, unsigned long end); + +void koi_create_pagetable(struct module *mod); + +void koi_map_kostack(struct module *mod); +unsigned long koi_mem_alloc(struct module *mod, unsigned long orig_addr, + unsigned long size); +void koi_mem_free(struct module *mod, unsigned long addr, unsigned long size, + bool is_const, int count, ...); +void *koi_mem_lookup(struct module *mod, unsigned long addr); +void koi_mem_free_callback(struct module *mod, unsigned long addr, + unsigned long size, void (*func)(void *)); +void koi_map_mem(struct module *mod, unsigned long addr, unsigned long size); +void koi_mem_free_to_user(struct module *mod, unsigned long addr, + unsigned long size); + +unsigned long koi_ttbr_ctor(struct module *mod); +extern void koi_do_switch_to_kernel_stack(void); +extern void koi_do_switch_to_ko_stack(void); + +#define switch_pgtable(ttbr1) \ + do { \ + write_sysreg(ttbr1, ttbr1_el1); \ + isb(); \ + asm volatile(ALTERNATIVE("nop; nop; nop", \ + "ic iallu; dsb nsh; isb", \ + ARM64_WORKAROUND_CAVIUM_27456)); \ + } while (0); + +#ifndef CONFIG_IEE +#define koi_switch_to_ko() \ + do { \ + unsigned long flags, ko_ttbr1, cur_sp; \ + unsigned long *ptr; \ + struct task_token *token; \ + asm volatile("mrs %0, daif\n" \ + "msr daifset, #2\n" \ + "isb\n" \ + "mov %1, sp\n" \ + : "=r"(flags), "=r"(cur_sp) \ + :); \ + if (!on_irq_stack(cur_sp, NULL)) { \ + koi_do_switch_to_ko_stack(); \ + ko_ttbr1 = koi_ttbr_ctor(THIS_MODULE); \ + token = (struct task_token *)((unsigned long)current + \ + koi_offset); \ + token->current_ttbr1 = ko_ttbr1 & (~TTBR_ASID_MASK); \ + } else { \ + ko_ttbr1 = koi_ttbr_ctor(THIS_MODULE); \ + ptr = SHIFT_PERCPU_PTR(koi_irq_current_ttbr1, \ + __kern_my_cpu_offset()); \ + *ptr = ko_ttbr1 & ~(TTBR_ASID_MASK); \ + } \ + switch_pgtable(ko_ttbr1); \ + asm volatile("msr daif, %0\n" \ + "isb\n" \ + : \ + : "r"(flags)); \ + } while (0); + +#define koi_switch_to_kernel() \ + do { \ + unsigned long cur_sp, flags, asid; \ + unsigned long *ptr; \ + struct task_token *token; \ + asm volatile("mrs %0, daif\n" \ + "msr daifset, #2\n" \ + "isb\n" \ + "mov %1, sp\n" \ + "mov %2, ttbr0_el1\n" \ + : "=r"(flags), "=r"(cur_sp), "=r"(asid) \ + :); \ + asid &= ~USER_ASID_FLAG; \ + asid &= TTBR_ASID_MASK; \ + switch_pgtable(koi_swapper_ttbr1); \ + if (!on_irq_stack(cur_sp, NULL)) { \ + token = (struct task_token *)((unsigned long)current + \ + koi_offset); \ + token->current_ttbr1 = koi_swapper_ttbr1; \ + koi_do_switch_to_kernel_stack(); \ + } else { \ + ptr = SHIFT_PERCPU_PTR(koi_irq_current_ttbr1, \ + __kern_my_cpu_offset()); \ + *ptr = koi_swapper_ttbr1; \ + } \ + asm volatile("msr daif, %0\n" \ + "isb\n" \ + : \ + : "r"(flags)); \ + } while (0); +#else +#define koi_switch_to_ko() \ + do { \ + unsigned long cur_sp, flags, ko_ttbr1; \ + unsigned long *ptr; \ + asm volatile("mrs %0, daif\n" \ + "msr daifset, #2\n" \ + "isb\n" \ + "mov %1, sp\n" \ + : "=r"(flags), "=r"(cur_sp) \ + :); \ + if (!on_irq_stack(cur_sp, NULL)) { \ + koi_do_switch_to_ko_stack(); \ + ko_ttbr1 = koi_ttbr_ctor(THIS_MODULE); \ + iee_rw_gate(IEE_WRITE_TOKEN_TTBR1, current, \ + ko_ttbr1 &(~TTBR_ASID_MASK)); \ + } else { \ + ko_ttbr1 = koi_ttbr_ctor(THIS_MODULE); \ + ptr = SHIFT_PERCPU_PTR(koi_irq_current_ttbr1, \ + __kern_my_cpu_offset()); \ + *ptr = ko_ttbr1 & (~TTBR_ASID_MASK); \ + } \ + iee_rwx_gate_entry(IEE_SWITCH_TO_KOI, ko_ttbr1); \ + asm volatile("msr daif, %0\n" \ + "isb\n" \ + : \ + : "r"(flags)); \ + } while (0); + +#define koi_switch_to_kernel() \ + do { \ + unsigned long flags, cur_sp; \ + unsigned long *ptr; \ + asm volatile("mrs %0, daif\n" \ + "msr daifset, #2\n" \ + "isb\n" \ + "mov %1, sp\n" \ + : "=r"(flags), "=r"(cur_sp) \ + :); \ + iee_rwx_gate_entry(IEE_SWITCH_TO_KERNEL); \ + if (!on_irq_stack(cur_sp, NULL)) { \ + iee_rw_gate(IEE_WRITE_TOKEN_TTBR1, current, \ + koi_swapper_ttbr1); \ + koi_do_switch_to_kernel_stack(); \ + } else { \ + ptr = SHIFT_PERCPU_PTR(koi_irq_current_ttbr1, \ + __kern_my_cpu_offset()); \ + *ptr = koi_swapper_ttbr1; \ + } \ + asm volatile("msr daif, %0\n" \ + "isb\n" \ + : \ + : "r"(flags)); \ + } while (0); +#endif +//kzalloc function in driver space +static __maybe_unused noinline void * +koi_kzalloc_wrapper(struct module *mod, size_t size, gfp_t flags) +{ + int cnt = (size + PAGE_SIZE - 1) / PAGE_SIZE; + void *addr; + struct koi_mem_hash_node *target = NULL; + koi_switch_to_kernel(); + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + printk("mem node for module: %s not found\n", mod->name); + return NULL; + } + + addr = kzalloc(size, flags); + koi_copy_pagetable(target->ko_mm, target->pgdp, (unsigned long)addr, + (unsigned long)addr + PAGE_SIZE * cnt); + koi_switch_to_ko(); + return addr; +} +//kmalloc function in driver space +static __maybe_unused __always_inline void * +koi_kmalloc_wrapper(struct module *mod, size_t size, gfp_t flags) +{ + int cnt = (size + PAGE_SIZE - 1) / PAGE_SIZE; + void *addr; + struct koi_mem_hash_node *target = NULL; + koi_switch_to_kernel(); + + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + printk("mem node for module: %s not found\n", mod->name); + return 0; + } + + addr = kmalloc(cnt * PAGE_SIZE, flags); + koi_copy_pagetable(target->ko_mm, target->pgdp, (unsigned long)addr, + (unsigned long)addr + PAGE_SIZE * cnt); + koi_switch_to_ko(); + return (void *)addr; +} +//vmalloc function in driver space +static __maybe_unused void *koi_vmalloc_wrapper(struct module *mod, + unsigned long size) +{ + int cnt = (size + PAGE_SIZE - 1) / PAGE_SIZE; + void *addr; + struct koi_mem_hash_node *target = NULL; + koi_switch_to_kernel(); + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + printk("mem node for module: %s not found\n", mod->name); + koi_switch_to_ko(); + return 0; + } + addr = vmalloc(cnt * PAGE_SIZE); + koi_copy_pagetable(target->ko_mm, target->pgdp, (unsigned long)addr, + (unsigned long)addr + PAGE_SIZE * cnt); + koi_switch_to_ko(); + return addr; +} +//kmalloc_array function in driver space +static __maybe_unused void *koi_kmalloc_array_wrapper(struct module *mod, + size_t n, size_t size, + gfp_t flags) +{ + int kpage; + void *addr; + struct koi_mem_hash_node *target = NULL; + koi_switch_to_kernel(); + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + printk("mem node for module: %s not found\n", mod->name); + koi_switch_to_ko(); + return 0; + } + kpage = (n * size + PAGE_SIZE - 1) / PAGE_SIZE; + n = (kpage * PAGE_SIZE) / size; + addr = kmalloc_array(n, size, flags); + koi_copy_pagetable(target->ko_mm, target->pgdp, (unsigned long)addr, + (unsigned long)addr + PAGE_SIZE * kpage); + koi_switch_to_ko(); + return addr; +} \ No newline at end of file diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index fde4186cc387..6309e5514a16 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -190,6 +190,13 @@ extern u64 vabits_actual; #endif extern s64 memstart_addr; + +#if defined(CONFIG_IEE) || defined(CONFIG_KOI) +extern s64 memstart_addr_init; +extern s64 iee_offset; +#define LOGICAL_RANDOM (long long int)((long unsigned int)__va(memstart_addr_init) & (~PAGE_OFFSET)) +#endif + /* PHYS_OFFSET - the physical address of the start of memory. */ #define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; }) @@ -310,6 +317,23 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x); #define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET) #define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset)) +#ifdef CONFIG_KOI +#define KOI_OFFSET ((unsigned long)BIT(vabits_actual - 2)) +#endif + +#ifdef CONFIG_IEE +#ifdef CONFIG_IEE_OFFSET +#define IEE_OFFSET ((CONFIG_IEE_OFFSET) - LOGICAL_RANDOM) +#else +#define IEE_OFFSET (((unsigned long)BIT(vabits_actual - 2)) - LOGICAL_RANDOM) +#endif +#define __phys_to_iee(x) (__phys_to_virt(x) + IEE_OFFSET) +#define SET_UPAGE(x) __pgprot(pgprot_val(x) | PTE_USER) +#define SET_PPAGE(x) __pgprot(pgprot_val(x) & (~PTE_USER)) +#define SET_INVALID(x) __pgprot(pgprot_val(x) & (~PTE_VALID)) +#define SET_NG(x) __pgprot(pgprot_val(x) | PTE_NG) +#endif + /* * Convert a page to/from a physical address */ diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index a6fb325424e7..cca5994dabfb 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -24,6 +24,9 @@ #include <asm/cputype.h> #include <asm/sysreg.h> #include <asm/tlbflush.h> +#ifdef CONFIG_IEE +#define INIT_ASID 0x2 +#endif extern bool rodata_full; @@ -43,7 +46,12 @@ static inline void cpu_set_reserved_ttbr0_nosync(void) { unsigned long ttbr = phys_to_ttbr(__pa_symbol(reserved_pg_dir)); +#ifdef CONFIG_IEE + ttbr |= FIELD_PREP(TTBR_ASID_MASK, 1); + iee_rwx_gate_entry(IEE_WRITE_ttbr0_el1, ttbr); +#else write_sysreg(ttbr, ttbr0_el1); +#endif } static inline void cpu_set_reserved_ttbr0(void) @@ -79,7 +87,11 @@ static inline void __cpu_set_tcr_t0sz(unsigned long t0sz) tcr &= ~TCR_T0SZ_MASK; tcr |= t0sz << TCR_T0SZ_OFFSET; +#ifdef CONFIG_IEE + iee_rwx_gate_entry(IEE_WRITE_tcr_el1, tcr); +#else write_sysreg(tcr, tcr_el1); +#endif isb(); } @@ -144,7 +156,11 @@ static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz) __cpu_set_tcr_t0sz(t0sz); /* avoid cpu_switch_mm() and its SW-PAN and CNP interactions */ + #ifdef CONFIG_IEE + iee_rwx_gate_entry(IEE_WRITE_ttbr0_el1, ttbr0); + #else write_sysreg(ttbr0, ttbr0_el1); + #endif isb(); } @@ -174,6 +190,10 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap) ttbr1 |= TTBR_CNP_BIT; } + #ifdef CONFIG_IEE + ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, ASID(current->active_mm)); + #endif + replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1); __cpu_install_idmap(idmap); diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 237224484d0f..4e3304da8421 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -63,6 +63,10 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp); +#ifdef CONFIG_KOI +pgd_t *koi_pgd_alloc(void); +#endif + static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep, pmdval_t prot) { diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index e4944d517c99..7f60e568c964 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -84,6 +84,13 @@ #define CONT_PMD_SIZE (CONT_PMDS * PMD_SIZE) #define CONT_PMD_MASK (~(CONT_PMD_SIZE - 1)) +#ifdef CONFIG_IEE +#define PGD_APT_RO (_AT(pudval_t, 1) << 62) +#endif +#define PGD_APT (_AT(pudval_t, 1) << 61) +#define PGD_PXN (_AT(pudval_t, 1) << 59) +#define PGD_UXN (_AT(pudval_t, 1) << 60) + /* * Hardware page table definitions. * @@ -285,6 +292,10 @@ #define TCR_TCMA0 (UL(1) << 57) #define TCR_TCMA1 (UL(1) << 58) +#ifdef CONFIG_IEE +#define TCR_HPD1 (UL(1) << 42) +#endif + /* * TTBR. */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 8d68d00de0a4..6f8d5b85bfd7 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -34,6 +34,9 @@ #include <linux/mm_types.h> #include <linux/sched.h> #include <linux/page_table_check.h> +#ifdef CONFIG_PTP +#include <asm/iee.h> +#endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE @@ -156,6 +159,30 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) #define pud_access_permitted(pud, write) \ (pte_access_permitted(pud_pte(pud), (write))) +#ifdef CONFIG_PTP +static inline bool in_tramp_pgdir(void *addr); +extern unsigned long long iee_rw_gate(int flag, ...); + +static void iee_set_tramp_pgd_pre_init(pgd_t *pgdp, pgd_t pgd) +{ + iee_rw_gate(IEE_OP_SET_TRAMP_PGD, pgdp, pgd); +} + +static noinline pteval_t iee_set_xchg_relaxed(pte_t *ptep, pteval_t pteval) +{ + pteval_t ret; + ret = iee_rw_gate(IEE_OP_SET_XCHG, ptep, pteval); + return (pteval_t)ret; +} + +static noinline pteval_t iee_set_cmpxchg_relaxed(pte_t *ptep, pteval_t old_pteval, pteval_t new_pteval) +{ + pteval_t ret; + ret = iee_rw_gate(IEE_OP_SET_CMPXCHG, ptep, old_pteval, new_pteval); + return (pteval_t)ret; +} +#endif + static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot) { pte_val(pte) &= ~pgprot_val(prot); @@ -262,6 +289,64 @@ static inline pte_t pte_mkdevmap(pte_t pte) static inline void __set_pte(pte_t *ptep, pte_t pte) { +#ifdef CONFIG_KOI + if (!pte_none(pte)) { + pte = __pte(pte_val(pte) | PTE_NG); + } +#endif +#ifdef CONFIG_PTP + iee_rw_gate(IEE_OP_SET_PTE, ptep, pte); + dsb(ishst); + isb(); +#else + WRITE_ONCE(*ptep, pte); + + /* + * Only if the new pte is valid and kernel, otherwise TLB maintenance + * or update_mmu_cache() have the necessary barriers. + */ + if (pte_valid_not_user(pte)) { + dsb(ishst); + isb(); + } +#endif +} + + +#ifdef CONFIG_PTP +static inline void iee_set_bm_pte(pte_t *ptep, pte_t pte) +{ + // If it is pre init, write once. + // Else, write once will cause exception. So it is safe. + unsigned long flags; + unsigned long res; + local_irq_save(flags); + asm volatile("at s1e1r, %0"::"r"(__phys_to_iee(__pa_symbol(ptep)))); + isb(); + res = read_sysreg(par_el1); + local_irq_restore(flags); + if(res & 0x1) + WRITE_ONCE(*ptep,pte); + else + iee_rw_gate(IEE_OP_SET_BM_PTE, ptep, pte); + + /* + * Only if the new pte is valid and kernel, otherwise TLB maintenance + * or update_mmu_cache() have the necessary barriers. + */ + if (pte_valid_not_user(pte)) { + dsb(ishst); + isb(); + } +} + +static inline void iee_set_fixmap_pte_pre_init(pte_t *ptep, pte_t pte) +{ +#ifdef CONFIG_KOI + if (!pte_none(pte)) { + pte = __pte(pte_val(pte) | PTE_NG); + } +#endif WRITE_ONCE(*ptep, pte); /* @@ -273,6 +358,7 @@ static inline void __set_pte(pte_t *ptep, pte_t pte) isb(); } } +#endif static inline pte_t __ptep_get(pte_t *ptep) { @@ -546,6 +632,95 @@ static inline void __set_pte_at(struct mm_struct *mm, __set_pte(ptep, pte); } +#ifdef CONFIG_IEE +static inline void iee_set_pte_upage(pte_t *ptep, pte_t pte) +{ +#ifdef CONFIG_PTP + iee_rw_gate(IEE_OP_SET_PTE_U, ptep, pte); + dsb(ishst); + isb(); +#else + WRITE_ONCE(*ptep, pte); + if (pte_valid_not_user(pte)) { + dsb(ishst); + isb(); + } +#endif +} + +static inline void iee_set_pte_ppage(pte_t *ptep, pte_t pte) +{ +#ifdef CONFIG_PTP + iee_rw_gate(IEE_OP_SET_PTE_P, ptep, pte); +#else + WRITE_ONCE(*ptep, pte); +#endif + if (pte_valid_not_user(pte)) { + dsb(ishst); + isb(); + } +} +#endif + +#ifdef CONFIG_PTP +static inline void set_pmd(pmd_t *pmdp, pmd_t pmd); +static inline void __set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + if (pte_present(pmd_pte(pmd)) && pte_user_exec(pmd_pte(pmd)) && !pte_special(pmd_pte(pmd))) + __sync_icache_dcache(pmd_pte(pmd)); + + /* + * If the PTE would provide user space access to the tags associated + * with it then ensure that the MTE tags are synchronised. Although + * pte_access_permitted() returns false for exec only mappings, they + * don't expose tags (instruction fetches don't check tags). + */ + if (system_supports_mte() && pte_access_permitted(pmd_pte(pmd), false) && + !pte_special(pmd_pte(pmd)) && pte_tagged(pmd_pte(pmd))) + mte_sync_tags(pmd_pte(pmd), PMD_SIZE >> PAGE_SHIFT); + + __check_safe_pte_update(mm, (pte_t *)pmdp, pmd_pte(pmd)); + + set_pmd(pmdp, pmd); +} + +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + page_table_check_pmd_set(mm, pmdp, pmd); + return __set_pmd_at(mm, addr, pmdp, pmd); +} + +static inline void set_pud(pud_t *pudp, pud_t pud); +static inline void __set_pud_at(struct mm_struct *mm, unsigned long addr, + pud_t *pudp, pud_t pud) +{ + if (pte_present(pud_pte(pud)) && pte_user_exec(pud_pte(pud)) && !pte_special(pud_pte(pud))) + __sync_icache_dcache(pud_pte(pud)); + + /* + * If the PTE would provide user space access to the tags associated + * with it then ensure that the MTE tags are synchronised. Although + * pte_access_permitted() returns false for exec only mappings, they + * don't expose tags (instruction fetches don't check tags). + */ + if (system_supports_mte() && pte_access_permitted(pud_pte(pud), false) && + !pte_special(pud_pte(pud)) && pte_tagged(pud_pte(pud))) + mte_sync_tags(pud_pte(pud), PUD_SIZE >> PAGE_SHIFT); + + __check_safe_pte_update(mm, (pte_t *)pudp, pud_pte(pud)); + + set_pud(pudp, pud); +} + +static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, + pud_t *pudp, pud_t pud) +{ + page_table_check_pud_set(mm, pudp, pud); + return __set_pud_at(mm, addr, pudp, pud); +} +#else static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { @@ -561,7 +736,7 @@ static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud), PUD_SIZE >> PAGE_SHIFT); } - +#endif #define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d)) #define __phys_to_p4d_val(phys) __phys_to_pte_val(phys) @@ -640,7 +815,14 @@ static inline bool in_swapper_pgdir(void *addr) ((unsigned long)swapper_pg_dir & PAGE_MASK); } -static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) +#ifdef CONFIG_PTP +static inline bool in_tramp_pgdir(void *addr) +{ + return ((unsigned long)addr & PAGE_MASK) == + ((unsigned long)tramp_pg_dir & PAGE_MASK); +} + +static inline void iee_set_fixmap_pmd_pre_init(pmd_t *pmdp, pmd_t pmd) { #ifdef __PAGETABLE_PMD_FOLDED if (in_swapper_pgdir(pmdp)) { @@ -648,7 +830,6 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) return; } #endif /* __PAGETABLE_PMD_FOLDED */ - WRITE_ONCE(*pmdp, pmd); if (pmd_valid(pmd)) { @@ -656,6 +837,32 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) isb(); } } +#endif + +static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) +{ +#ifdef __PAGETABLE_PMD_FOLDED + if (in_swapper_pgdir(pmdp)) { + set_swapper_pgd((pgd_t *)pmdp, __pgd(pmd_val(pmd))); + return; + } +#endif /* __PAGETABLE_PMD_FOLDED */ +#ifdef CONFIG_KOI + pmdval_t val = pmd_val(pmd); + if (pmd_valid(pmd) && !(val & PMD_TABLE_BIT)) { + pmd = __pmd(val | PMD_SECT_NG); + } +#endif +#ifdef CONFIG_PTP + iee_rw_gate(IEE_OP_SET_PMD, pmdp, pmd); +#else + WRITE_ONCE(*pmdp, pmd); +#endif + if (pmd_valid(pmd)) { + dsb(ishst); + isb(); + } +} static inline void pmd_clear(pmd_t *pmdp) { @@ -675,6 +882,12 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) /* Find an entry in the third-level page table. */ #define pte_offset_phys(dir,addr) (pmd_page_paddr(READ_ONCE(*(dir))) + pte_index(addr) * sizeof(pte_t)) +#ifdef CONFIG_PTP +#define pte_set_fixmap_init(addr) ((pte_t *)iee_set_fixmap_offset_pre_init(FIX_PTE, addr)) +#define pte_set_fixmap_offset_init(pmd, addr) pte_set_fixmap_init(pte_offset_phys(pmd, addr)) +#define pte_clear_fixmap_init() clear_fixmap_init(FIX_PTE) +#endif + #define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr)) #define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr)) #define pte_clear_fixmap() clear_fixmap(FIX_PTE) @@ -703,7 +916,9 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #define pud_user(pud) pte_user(pud_pte(pud)) #define pud_user_exec(pud) pte_user_exec(pud_pte(pud)) -static inline void set_pud(pud_t *pudp, pud_t pud) + +#ifdef CONFIG_PTP +static inline void iee_set_fixmap_pud_pre_init(pud_t *pudp, pud_t pud) { #ifdef __PAGETABLE_PUD_FOLDED if (in_swapper_pgdir(pudp)) { @@ -711,7 +926,6 @@ static inline void set_pud(pud_t *pudp, pud_t pud) return; } #endif /* __PAGETABLE_PUD_FOLDED */ - WRITE_ONCE(*pudp, pud); if (pud_valid(pud)) { @@ -719,6 +933,33 @@ static inline void set_pud(pud_t *pudp, pud_t pud) isb(); } } +#endif + +static inline void set_pud(pud_t *pudp, pud_t pud) +{ +#ifdef __PAGETABLE_PUD_FOLDED + if (in_swapper_pgdir(pudp)) { + set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud))); + return; + } +#endif /* __PAGETABLE_PUD_FOLDED */ +#ifdef CONFIG_KOI + pudval_t val = pud_val(pud); + if (pud_valid(pud) && !(val & PUD_TABLE_BIT)) { + // There is no PUD_SEC_NG, so we use PMD_SECT_NG instead. + pud = __pud(val | PMD_SECT_NG); + } +#endif +#ifdef CONFIG_PTP + iee_rw_gate(IEE_OP_SET_PUD, pudp, pud); +#else + WRITE_ONCE(*pudp, pud); +#endif + if (pud_valid(pud)) { + dsb(ishst); + isb(); + } +} static inline void pud_clear(pud_t *pudp) { @@ -738,6 +979,12 @@ static inline pmd_t *pud_pgtable(pud_t pud) /* Find an entry in the second-level page table. */ #define pmd_offset_phys(dir, addr) (pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t)) +#ifdef CONFIG_PTP +#define pmd_set_fixmap_init(addr) ((pmd_t *)iee_set_fixmap_offset_pre_init(FIX_PMD, addr)) +#define pmd_set_fixmap_offset_init(pud, addr) pmd_set_fixmap_init(pmd_offset_phys(pud, addr)) +#define pmd_clear_fixmap_init() clear_fixmap_init(FIX_PMD) +#endif + #define pmd_set_fixmap(addr) ((pmd_t *)set_fixmap_offset(FIX_PMD, addr)) #define pmd_set_fixmap_offset(pud, addr) pmd_set_fixmap(pmd_offset_phys(pud, addr)) #define pmd_clear_fixmap() clear_fixmap(FIX_PMD) @@ -769,15 +1016,26 @@ static inline pmd_t *pud_pgtable(pud_t pud) #define p4d_none(p4d) (!p4d_val(p4d)) #define p4d_bad(p4d) (!(p4d_val(p4d) & 2)) #define p4d_present(p4d) (p4d_val(p4d)) +#define p4d_valid(p4d) pte_valid(p4d_pte(p4d)) static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) { - if (in_swapper_pgdir(p4dp)) { + if (in_swapper_pgdir(p4dp)) + { set_swapper_pgd((pgd_t *)p4dp, __pgd(p4d_val(p4d))); return; } +#ifdef CONFIG_PTP + if(in_tramp_pgdir(p4dp)) + { + iee_set_tramp_pgd_pre_init((pgd_t *)p4dp, __pgd(p4d_val(p4d))); + return; + } + iee_rw_gate(IEE_OP_SET_P4D, p4dp, p4d); +#else WRITE_ONCE(*p4dp, p4d); +#endif dsb(ishst); isb(); } @@ -800,6 +1058,12 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) /* Find an entry in the first-level page table. */ #define pud_offset_phys(dir, addr) (p4d_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t)) +#ifdef CONFIG_PTP +#define pud_set_fixmap_init(addr) ((pud_t *)iee_set_fixmap_offset_pre_init(FIX_PUD, addr)) +#define pud_set_fixmap_offset_init(p4d, addr) pud_set_fixmap_init(pud_offset_phys(p4d, addr)) +#define pud_clear_fixmap_init() clear_fixmap_init(FIX_PUD) +#endif + #define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr)) #define pud_set_fixmap_offset(p4d, addr) pud_set_fixmap(pud_offset_phys(p4d, addr)) #define pud_clear_fixmap() clear_fixmap(FIX_PUD) @@ -826,6 +1090,10 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e)) +#ifdef CONFIG_PTP +#define pgd_set_fixmap_init(addr) ((pgd_t *)iee_set_fixmap_offset_pre_init(FIX_PGD, addr)) +#define pgd_clear_fixmap_init() clear_fixmap_init(FIX_PGD) +#endif #define pgd_set_fixmap(addr) ((pgd_t *)set_fixmap_offset(FIX_PGD, addr)) #define pgd_clear_fixmap() clear_fixmap(FIX_PGD) @@ -912,8 +1180,13 @@ static inline int __ptep_test_and_clear_young(struct vm_area_struct *vma, do { old_pte = pte; pte = pte_mkold(pte); + #ifdef CONFIG_PTP + pte_val(pte) = iee_set_cmpxchg_relaxed(ptep, + pte_val(old_pte), pte_val(pte)); + #else pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte), pte_val(pte)); + #endif } while (pte_val(pte) != pte_val(old_pte)); return pte_young(pte); @@ -952,8 +1225,12 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, static inline pte_t __ptep_get_and_clear(struct mm_struct *mm, unsigned long address, pte_t *ptep) { + #ifdef CONFIG_PTP + pteval_t pteval= iee_set_xchg_relaxed((pte_t *)&pte_val(*ptep), (pteval_t)0); + pte_t pte = __pte(pteval); + #else pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0)); - + #endif page_table_check_pte_clear(mm, pte); return pte; @@ -995,7 +1272,12 @@ static inline pte_t __get_and_clear_full_ptes(struct mm_struct *mm, static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long address, pmd_t *pmdp) { + #ifdef CONFIG_PTP + pteval_t pteval= iee_set_xchg_relaxed((pte_t *)&pmd_val(*pmdp), (pteval_t)0); + pmd_t pmd = __pmd(pteval); + #else pmd_t pmd = __pmd(xchg_relaxed(&pmd_val(*pmdp), 0)); + #endif page_table_check_pmd_clear(mm, pmd); @@ -1012,8 +1294,12 @@ static inline void ___ptep_set_wrprotect(struct mm_struct *mm, do { old_pte = pte; pte = pte_wrprotect(pte); + #ifdef CONFIG_PTP + pte_val(pte) = iee_set_cmpxchg_relaxed(ptep,pte_val(old_pte), pte_val(pte)); + #else pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte), pte_val(pte)); + #endif } while (pte_val(pte) != pte_val(old_pte)); } @@ -1049,7 +1335,11 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t pmd) { page_table_check_pmd_set(vma->vm_mm, pmdp, pmd); + #ifdef CONFIG_PTP + return __pmd((pmdval_t)iee_set_xchg_relaxed((pte_t *)&pmd_val(*pmdp), (pmdval_t)pmd_val(pmd))); + #else return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd))); + #endif } #endif diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h index d2e0306e65d3..8352e92d4536 100644 --- a/arch/arm64/include/asm/pointer_auth.h +++ b/arch/arm64/include/asm/pointer_auth.h @@ -108,8 +108,13 @@ static __always_inline void ptrauth_enable(void) { if (!system_supports_address_auth()) return; + #ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, 0, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | + SCTLR_ELx_ENDA | SCTLR_ELx_ENDB)); + #else sysreg_clear_set(sctlr_el1, 0, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | SCTLR_ELx_ENDA | SCTLR_ELx_ENDB)); + #endif isb(); } diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 42358b8d678e..1e2d11e57fe3 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1116,6 +1116,64 @@ write_sysreg_s(__scs_new, sysreg); \ } while (0) + +#ifdef CONFIG_IEE + +#define SYS_TCR_IEE_SI TCR_HPD1 | TCR_A1 + +extern void iee_rwx_gate_entry(int flag, ...); +#define IEE_SI_TEST 0 +#define IEE_WRITE_sctlr_el1 1 +#define IEE_WRITE_ttbr0_el1 2 +#define IEE_WRITE_vbar_el1 3 +#define IEE_WRITE_tcr_el1 4 +#define IEE_WRITE_mdscr_el1 5 +#define IEE_WRITE_AFSR0 10 + +#define sysreg_clear_set_iee_si(sysreg, clear, set) do { \ + u64 __scs_val = read_sysreg(sysreg); \ + u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \ + if (__scs_new != __scs_val) \ + iee_rwx_gate_entry(IEE_WRITE_##sysreg, __scs_new); \ +} while (0) + +#define IEE_SI_WRITE_DAIF_SEL "msr daifclr, #0xf\n\t" \ + "tbnz %x0, #6, 114221f\n\t" \ + "tbnz %x0, #7, 114210f\n\t" \ + "tbnz %x0, #8, 114100f\n\t" \ + "msr daifset, #0b000\n\t" \ + "b 114514f\n\t" \ +"114221:\n\t" \ + "tbnz %x0, #7, 114211f\n\t" \ + "tbnz %x0, #8, 114101f\n\t" \ + "msr daifset, #0b001\n\t" \ + "b 114514f\n\t" \ +"114211:\n\t" \ + "tbnz %x0, #8, 114111f\n\t" \ + "msr daifset, #0b011\n\t" \ + "b 114514f\n\t" \ +"114210:\n\t" \ + "tbnz %x0, #8, 114110f\n\t" \ + "msr daifset, #0b010\n\t" \ + "b 114514f\n\t" \ +"114100:\n\t" \ + "msr daifset, #0b100\n\t" \ + "b 114514f\n\t" \ +"114101:\n\t" \ + "msr daifset, #0b101\n\t" \ + "b 114514f\n\t" \ +"114110:\n\t" \ + "msr daifset, #0b110\n\t" \ + "b 114514f\n\t" \ +"114111:\n\t" \ + "msr daifset, #0b111\n\t" \ +"114514:\n\t" + +#define iee_si_write_daif(v) do { \ + u64 __val = (u64)(v); \ + asm volatile(IEE_SI_WRITE_DAIF_SEL: : "rZ" (__val));} while (0) +#endif + #define read_sysreg_par() ({ \ u64 par; \ asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); \ diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 2c29239d05c3..955f99317790 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -11,8 +11,17 @@ #include <linux/pagemap.h> #include <linux/swap.h> +#ifdef CONFIG_PTP +#include <linux/iee-func.h> +#endif + static inline void __tlb_remove_table(void *_table) { +#ifdef CONFIG_PTP + unsigned long iee_addr = __phys_to_iee(page_to_phys((struct page *)_table)); + set_iee_page_invalid(iee_addr); + iee_set_logical_mem_rw((unsigned long)page_address((struct page *)_table)); +#endif free_page_and_swap_cache((struct page *)_table); } diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 831c314d75ff..7775628528c6 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -49,6 +49,7 @@ #define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0) + #define __tlbi_user(op, arg) do { \ if (arm64_kernel_unmapped_at_el0()) \ __tlbi(op, (arg) | USER_ASID_FLAG); \ @@ -258,6 +259,10 @@ static inline void flush_tlb_mm(struct mm_struct *mm) asid = __TLBI_VADDR(0, ASID(mm)); __tlbi(aside1is, asid); __tlbi_user(aside1is, asid); + #if defined(CONFIG_IEE) || defined (CONFIG_KOI) + if (!arm64_kernel_unmapped_at_el0()) + __tlbi(aside1is, asid | USER_ASID_FLAG); + #endif dsb(ish); mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); } @@ -273,6 +278,10 @@ static inline void __flush_tlb_page_nosync(struct mm_struct *mm, __tlbi_user(vale1is, addr); mmu_notifier_arch_invalidate_secondary_tlbs(mm, uaddr & PAGE_MASK, (uaddr & PAGE_MASK) + PAGE_SIZE); + #if defined(CONFIG_IEE) || defined(CONFIG_KOI) + if (!arm64_kernel_unmapped_at_el0()) + __tlbi(vale1is, addr | USER_ASID_FLAG); + #endif } static inline void flush_tlb_page_nosync(struct vm_area_struct *vma, @@ -366,6 +375,45 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) * 2. If there is 1 page remaining, flush it through non-range operations. Range * operations can only span an even number of pages. */ +#if defined(CONFIG_IEE) || defined(CONFIG_KOI) +#define __flush_tlb_range_op(op, start, pages, stride, \ + asid, tlb_level, tlbi_user) \ +do { \ + int num = 0; \ + int scale = 0; \ + unsigned long addr; \ + \ + while (pages > 0) { \ + if (!system_supports_tlb_range() || \ + pages % 2 == 1) { \ + addr = __TLBI_VADDR(start, asid); \ + __tlbi_level(op, addr, tlb_level); \ + if (!arm64_kernel_unmapped_at_el0()) /* added for IEE */ \ + __tlbi_level(op, addr | USER_ASID_FLAG, tlb_level); \ + if (tlbi_user) \ + __tlbi_user_level(op, addr, tlb_level); \ + start += stride; \ + pages -= stride >> PAGE_SHIFT; \ + continue; \ + } \ + \ + num = __TLBI_RANGE_NUM(pages, scale); \ + if (num >= 0) { \ + addr = __TLBI_VADDR_RANGE(start, asid, scale, \ + num, tlb_level); \ + __tlbi(r##op, addr); \ + if (!arm64_kernel_unmapped_at_el0()) /* added for IEE */ \ + __tlbi(r##op, addr | USER_ASID_FLAG); \ + if (tlbi_user) \ + __tlbi_user(r##op, addr); \ + start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \ + pages -= __TLBI_RANGE_PAGES(num, scale); \ + } \ + scale++; \ + } \ +} while (0) + +#else #define __flush_tlb_range_op(op, start, pages, stride, \ asid, tlb_level, tlbi_user) \ do { \ @@ -399,6 +447,8 @@ do { \ } \ } while (0) +#endif //if defined(CONFIG_IEE) || defined(CONFIG_KOI) + #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \ __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false) @@ -467,7 +517,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end return; } - start = __TLBI_VADDR(start, 0); + start = __TLBI_VADDR(start, 0); end = __TLBI_VADDR(end, 0); dsb(ishst); @@ -483,9 +533,9 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end */ static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) { - unsigned long addr = __TLBI_VADDR(kaddr, 0); - - dsb(ishst); + unsigned long addr = __TLBI_VADDR(kaddr, 0); + + dsb(ishst); __tlbi(vaae1is, addr); dsb(ish); isb(); diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 21ef9c21a400..44eb76cc54d7 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -36,6 +36,8 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ syscall.o proton-pack.o idreg-override.o idle.o \ patching.o +obj-y += iee/ +obj-$(CONFIG_KOI) += koi/ obj-$(CONFIG_AARCH32_EL0) += binfmt_elf32.o sys32.o signal32.o \ sys_compat.o obj-$(CONFIG_AARCH32_EL0) += sigreturn32.o diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index fd0f291e215e..c008e46b5fc0 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -306,11 +306,19 @@ static int cp15barrier_handler(struct pt_regs *regs, u32 instr) static int cp15_barrier_set_hw_mode(bool enable) { +#ifdef CONFIG_IEE + if (enable) + sysreg_clear_set_iee_si(sctlr_el1, 0, SCTLR_EL1_CP15BEN); + else + sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_CP15BEN, 0); + return 0; +#else if (enable) sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_CP15BEN); else sysreg_clear_set(sctlr_el1, SCTLR_EL1_CP15BEN, 0); return 0; +#endif } static bool try_emulate_cp15_barrier(struct pt_regs *regs, u32 insn) @@ -341,11 +349,19 @@ static int setend_set_hw_mode(bool enable) if (!cpu_supports_mixed_endian_el0()) return -EINVAL; +#ifdef CONFIG_IEE + if (enable) + sysreg_clear_set_iee_si(sctlr_el1, 0, SCTLR_EL1_CP15BEN); + else + sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_CP15BEN, 0); + return 0; +#else if (enable) sysreg_clear_set(sctlr_el1, SCTLR_EL1_SED, 0); else sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_SED); return 0; +#endif } static int __a32_setend_handler(struct pt_regs *regs, u32 big_endian) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index e997ad275afb..e105f633355a 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -97,6 +97,17 @@ int main(void) DEFINE(FREGS_DIRECT_TRAMP, offsetof(struct ftrace_regs, direct_tramp)); #endif DEFINE(FREGS_SIZE, sizeof(struct ftrace_regs)); +#ifdef CONFIG_IEE + DEFINE(iee_from_token_offset, offsetof(struct task_token, iee_stack)); + DEFINE(kernel_from_token_offset, offsetof(struct task_token, kernel_stack)); + DEFINE(mm_from_task_offset, offsetof(struct task_struct, mm)); +#endif +#ifdef CONFIG_KOI + DEFINE(koi_kernel_from_token_offset, offsetof(struct task_token, koi_kernel_stack)); + DEFINE(koi_from_token_offset, offsetof(struct task_token, koi_stack)); + DEFINE(ttbr1_from_token_offset, offsetof(struct task_token, current_ttbr1)); + DEFINE(koi_stack_base_from_token_offset, offsetof(struct task_token, koi_stack_base)); +#endif BLANK(); #endif #ifdef CONFIG_AARCH32_EL0 diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 46813132a09f..412006aa323c 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -80,7 +80,11 @@ hisilicon_1980005_enable(const struct arm64_cpu_capabilities *__unused) __set_bit(ARM64_HAS_CACHE_IDC, system_cpucaps); arm64_ftr_reg_ctrel0.sys_val |= BIT(CTR_EL0_IDC_SHIFT); arm64_ftr_reg_ctrel0.strict_mask &= ~BIT(CTR_EL0_IDC_SHIFT); +#ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_UCT, 0); +#else sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); +#endif } #endif @@ -132,7 +136,11 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap) enable_uct_trap = true; if (enable_uct_trap) +#ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_UCT, 0); +#else sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); +#endif } #ifdef CONFIG_ARM64_ERRATUM_1463225 @@ -147,7 +155,11 @@ has_cortex_a76_erratum_1463225(const struct arm64_cpu_capabilities *entry, static void __maybe_unused cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) { +#ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_UCI, 0); +#else sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0); +#endif } #ifdef CONFIG_HISILICON_ERRATUM_HIP08_RU_PREFETCH diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 74e445251b51..ebc1e776b175 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -94,6 +94,10 @@ #include <asm/vectors.h> #include <asm/virt.h> +#ifdef CONFIG_IEE +#include <asm/iee-si.h> +#endif + /* Kernel representation of AT_HWCAP and AT_HWCAP2 */ static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly; @@ -1612,7 +1616,11 @@ static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unu * value. */ if (!(read_cpuid_cachetype() & BIT(CTR_EL0_IDC_SHIFT))) +#ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_UCT, 0); +#else sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); +#endif } static bool has_cache_dic(const struct arm64_cpu_capabilities *entry, @@ -1873,7 +1881,11 @@ static inline void __cpu_enable_hw_dbm(void) { u64 tcr = read_sysreg(tcr_el1) | TCR_HD; +#ifdef CONFIG_IEE + iee_rwx_gate_entry(IEE_WRITE_tcr_el1, tcr); +#else write_sysreg(tcr, tcr_el1); +#endif isb(); local_flush_tlb_all(); } @@ -2056,7 +2068,9 @@ static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) */ WARN_ON_ONCE(in_interrupt()); + #ifndef CONFIG_IEE sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); + #endif set_pstate_pan(1); } #endif /* CONFIG_ARM64_PAN */ @@ -2121,7 +2135,11 @@ static bool has_generic_auth(const struct arm64_cpu_capabilities *entry, static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap) { if (this_cpu_has_cap(ARM64_HAS_E0PD)) +#ifdef CONFIG_IEE + sysreg_clear_set_iee_si(tcr_el1, 0, TCR_E0PD1); +#else sysreg_clear_set(tcr_el1, 0, TCR_E0PD1); +#endif } #endif /* CONFIG_ARM64_E0PD */ @@ -2214,7 +2232,11 @@ static void nmi_enable(const struct arm64_cpu_capabilities *__unused) * avoid leaving things masked. */ _allint_clear(); + #ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_SPINTMASK, SCTLR_EL1_NMI); + #else sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPINTMASK, SCTLR_EL1_NMI); + #endif isb(); } #endif @@ -2229,7 +2251,11 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused) * So, be strict and forbid other BRs using other registers to * jump onto a PACIxSP instruction: */ +#ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, 0, SCTLR_EL1_BT0 | SCTLR_EL1_BT1); +#else sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_BT0 | SCTLR_EL1_BT1); +#endif isb(); } #endif /* CONFIG_ARM64_BTI */ @@ -2237,7 +2263,11 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused) #ifdef CONFIG_ARM64_MTE static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) { + #ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0); + #else sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0); + #endif mte_cpu_setup(); @@ -2271,7 +2301,11 @@ static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, in static void cpu_trap_el0_impdef(const struct arm64_cpu_capabilities *__unused) { + #ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, 0, SCTLR_EL1_TIDCP); + #else sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_TIDCP); + #endif } static void cpu_enable_dit(const struct arm64_cpu_capabilities *__unused) @@ -2281,7 +2315,11 @@ static void cpu_enable_dit(const struct arm64_cpu_capabilities *__unused) static void cpu_enable_mops(const struct arm64_cpu_capabilities *__unused) { + #ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, 0, SCTLR_EL1_MSCEn); + #else sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_MSCEn); + #endif } /* Internal helper functions to match cpu capability type */ @@ -3475,6 +3513,43 @@ static void __init setup_system_capabilities(void) enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU); } +#ifdef CONFIG_IEE + +static void iee_si_test_end(void) +{ + pr_info("IEE: testing iee_exec_entry sctlr...\n"); + iee_rwx_gate_entry(IEE_WRITE_SCTLR, read_sysreg(sctlr_el1)& ~SCTLR_ELx_M); + pr_info("IEE: testing iee_exec_entry ttbr0_el1...\n"); + iee_rwx_gate_entry(IEE_WRITE_TTBR0, read_sysreg(ttbr0_el1)); + pr_info("IEE: testing iee_exec_entry vbar...\n"); + iee_rwx_gate_entry(IEE_WRITE_VBAR, read_sysreg(vbar_el1)); + pr_info("IEE: testing iee_exec_entry tcr...\n"); + iee_rwx_gate_entry(IEE_WRITE_TCR, read_sysreg(tcr_el1)); + // pr_info("IEE: testing iee_exec_entry mdscr...\n"); + // iee_rwx_gate_entry(IEE_WRITE_MDSCR, read_sysreg(mdscr_el1)); + // pr_info("IEE: testing iee_exec_entry afsr0...\n"); + // iee_rwx_gate_entry(IEE_WRITE_AFSR0); + #ifdef CONFIG_KOI + write_sysreg(read_sysreg(ttbr0_el1)+0x3000000000000, ttbr0_el1); + pr_info("IEE: current TTBR1_EL1:%llx, TTBR0:%llx\n", read_sysreg(ttbr1_el1), read_sysreg(ttbr0_el1)); + pr_info("IEE: testing iee_exec_entry switch to koi...\n"); + iee_rwx_gate_entry(IEE_SWITCH_TO_KOI, phys_to_ttbr(__pa_symbol(swapper_pg_dir))); + pr_info("IEE: current TTBR1_EL1:%llx, TTBR0:%llx\n", read_sysreg(ttbr1_el1), read_sysreg(ttbr0_el1)); + pr_info("IEE: testing iee_exec_entry switch to kernel...\n"); + iee_rwx_gate_entry(IEE_SWITCH_TO_KERNEL); + #endif +} + +/* Finish iee rwx gate initializations. */ +static void __init iee_si_init_done(void) +{ + // Prepare data for iee rwx gate + iee_si_prepare_data(); + // All initialization is done. Do some simple tests. + iee_si_test_end(); +} +#endif + void __init setup_cpu_features(void) { u32 cwg; @@ -3502,6 +3577,10 @@ void __init setup_cpu_features(void) if (!cwg) pr_warn("No Cache Writeback Granule information, assuming %d\n", ARCH_DMA_MINALIGN); + + #ifdef CONFIG_IEE + iee_si_init_done(); + #endif } static int enable_mismatched_32bit_el0(unsigned int cpu) diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 745aefddd9a3..265417e0ad81 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -36,10 +36,14 @@ u8 debug_monitors_arch(void) */ static void mdscr_write(u32 mdscr) { +// #ifdef CONFIG_IEE +// iee_rwx_gate_entry(IEE_WRITE_mdscr_el1, mdscr); +// #else unsigned long flags; flags = local_daif_save(); write_sysreg(mdscr, mdscr_el1); local_daif_restore(flags); +// #endif } NOKPROBE_SYMBOL(mdscr_write); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 08274e4317b2..0d259e355c90 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -156,7 +156,11 @@ asmlinkage void noinstr asm_exit_to_user_mode(struct pt_regs *regs) * mode. Before this function is called it is not safe to call regular kernel * code, instrumentable code, or any code which may trigger an exception. */ +#ifdef CONFIG_IEE +void noinstr arm64_enter_nmi(struct pt_regs *regs) +#else static void noinstr arm64_enter_nmi(struct pt_regs *regs) +#endif { regs->lockdep_hardirqs = lockdep_hardirqs_enabled(); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 7fcbee0f6c0e..dee813ee6aad 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -29,12 +29,391 @@ #include <asm/asm-uaccess.h> #include <asm/unistd.h> +#ifdef CONFIG_IEE +#include <asm/iee-def.h> + +#define BAD_SP_EL0 0 +#define BAD_ELR_EL1 1 +#define BAD_TCR_EL1 2 +#define BAD_IEE_SI 4 +#endif + .macro clear_gp_regs .irp n,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29 mov x\n, xzr .endr .endm +#ifdef CONFIG_KOI +#ifdef CONFIG_IEE +/* + * This function is used to switch to ko stack in glue code + */ +SYM_FUNC_START(koi_do_switch_to_ko_stack) + sub sp, sp, #48 + stp x29, x30, [sp] + str x2, [sp, #16] + stp x0, x1, [sp, #32] + + // iee_rw_gate(IEE_WRITE_KERNEL_STACK, current, sp) + mov x0, #IEE_WRITE_KOI_KERNEL_STACK + mrs x1, sp_el0 + add x2, sp, #48 + + bl iee_rw_gate + + // iee_rw_gate(IEE_READ_KOI_STACK, current) + mov x0, #IEE_READ_KOI_STACK + mrs x1, sp_el0 + bl iee_rw_gate + + ldp x29, x30, [sp] + ldr x2, [sp, #16] + add x1, sp, #32 + mov sp, x0 + ldp x0, x1, [x1] + + isb + ret +SYM_FUNC_END(koi_do_switch_to_ko_stack) + +/* + * This fucntion is used to switch to kernel stack in glue code + */ +SYM_FUNC_START(koi_do_switch_to_kernel_stack) + sub sp, sp, #48 + stp x29, x30, [sp] + str x2, [sp, #16] + stp x0, x1, [sp, #32] + // iee_rw_gate(IEE_WRITE_KOI_STACK, current, sp) + mov x0, #IEE_WRITE_KOI_STACK + mrs x1, sp_el0 + add x2, sp, #48 + bl iee_rw_gate + + // iee_rw_gate(IEE_READ_KOI_KERNEL_STACK, current) + mov x0, #IEE_READ_KOI_KERNEL_STACK + mrs x1, sp_el0 + bl iee_rw_gate + + ldp x29, x30, [sp] + ldr x2, [sp, #16] + add x1, sp, #32 + mov sp, x0 + ldp x0, x1, [x1] + isb + ret +SYM_FUNC_END(koi_do_switch_to_kernel_stack) + +/* + * Before switch to ko's pgtable, we must switch current stack to ko's stack. + * We have stored registers to kernel stack, and we need to restore them from ko's stack after switching, + * so we need to copy from kernel stack to ko stack + * the memory region to copy is [sp, stack_top) + * void koi_switch_to_ko_stack(void); + */ +SYM_FUNC_START(koi_switch_to_ko_stack) + mrs x17, pan + msr pan, 0x0 + + sub sp, sp, #32 + str x17, [sp, #16] + stp x30, x29, [sp] + + // current sp stores in x1 + add x1, x1, #176 + // current sp_el0 stores in x0 + bl _iee_write_koi_kernel_stack + + mrs x0, sp_el0 + bl _iee_read_koi_stack + + ldr x17, [sp, #16] + ldp x30, x29, [sp] + add sp, sp, #32 + + msr pan, x17 + + sub x0, x0, #176 + mov x1, sp + mov x2, #176 + + // memcpy(current->driver_stack, current->kernel_stack, 176) + mov x16, lr + bl memcpy + mov lr, x16 + + mov sp, x0 + isb + ret +SYM_FUNC_END(koi_switch_to_ko_stack) + +SYM_FUNC_START(koi_switch_to_kernel_stack) + /* + * current sp belongs to driver stack, and the bottom 160 bytes saves registers when exception occurred, + * so we should add 160 to current sp, and store it in task_struct + * also, fetch kernel sp from task_struct, copy the bottom 160 bytes from driver stack to kernel stack + */ + mrs x17, pan + msr pan, 0x0 + + sub sp, sp, #32 + stp x30, x29, [sp] + str x17, [sp, #16] + + mrs x0, sp_el0 + add x1, sp, #192 + bl _iee_write_koi_stack + + mrs x0, sp_el0 + bl _iee_read_koi_kernel_stack + + ldr x17, [sp, #16] + ldp x30, x29, [sp] + add sp, sp, #32 + + msr pan, x17 + + // x0 = kernel_stack + sub x0, x0, #160 + mov x1, sp + // x2 = 160 + mov x2, #160 + + mov x16, lr + bl memcpy + mov lr, x16 + + mov sp, x0 + isb + ret +SYM_FUNC_END(koi_switch_to_kernel_stack) +#else +/* + * This function is used to switch to ko stack in glue code + */ +SYM_FUNC_START(koi_do_switch_to_ko_stack) + sub sp, sp, #16 + stp x16, x17, [sp] + mrs x17, sp_el0 + adrp x16, koi_offset + ldr x16, [x16, #:lo12:koi_offset] + add x17, x17, x16 + add x16, sp, #16 + str x16, [x17, #koi_kernel_from_token_offset] + ldr x16, [x17, #koi_from_token_offset] + mov x17, sp + mov sp, x16 + ldp x16, x17, [x17] + isb + ret +SYM_FUNC_END(koi_do_switch_to_ko_stack) + +/* + * This fucntion is used to switch to kernel stack in glue code + */ +SYM_FUNC_START(koi_do_switch_to_kernel_stack) + sub sp, sp, #16 + stp x16, x17, [sp] + mrs x17, sp_el0 + adrp x16, koi_offset + ldr x16, [x16, #:lo12:koi_offset] + add x17, x17, x16 + add x16, sp, #16 + str x16, [x17, #koi_from_token_offset] + ldr x16, [x17, #koi_kernel_from_token_offset] + mov x17, sp + mov sp, x16 + ldp x16, x17, [x17] + isb + ret +SYM_FUNC_END(koi_do_switch_to_kernel_stack) + +/* + * Before switch to ko's pgtable, we must switch current stack to ko's stack. + * We have stored registers to kernel stack, and we need to restore them from ko's stack after switching, + * so we need to copy from kernel stack to ko stack + * the memory region to copy is [sp, stack_top) + * void koi_switch_to_ko_stack(unsigned long stack_top); + */ +SYM_FUNC_START(koi_switch_to_ko_stack) + // current sp stores in x1 + add x3, x1, #176 + adrp x4, koi_offset + ldr x4, [x4, #:lo12:koi_offset] + add x4, x0, x4 + // current sp_el0 stores in x0 + str x3, [x4, #koi_kernel_from_token_offset] + ldr x0, [x4, #koi_from_token_offset] + sub x0, x0, #176 + mov x2, #176 + + // memcpy(current->driver_stack, current->kernel_stack, 176) + mov x16, lr + bl memcpy + mov lr, x16 + + mov sp, x0 + isb + ret +SYM_FUNC_END(koi_switch_to_ko_stack) + +SYM_FUNC_START(koi_switch_to_kernel_stack) + /* + * current sp belongs to driver stack, and the bottom 176 bytes saves registers when exception occurred, + * so we should add 176 to current sp, and store it in task_struct + * also, fetch kernel sp from task_struct, copy the bottom 176 bytes from driver stack to kernel stack + */ + mov x1, sp + add x3, sp, #160 + + mrs x16, sp_el0 + adrp x2, koi_offset + ldr x2, [x2, #:lo12:koi_offset] + add x16, x16, x2 + str x3, [x16, #koi_from_token_offset] + // sp points to kernel_stack + ldr x0, [x16, #koi_kernel_from_token_offset] + + // x0 = kernel_stack + sub x0, x0, #160 + // x2 = 160 + mov x2, #160 + mov x16, lr + // memcpy(kernel_stack, driver_stack, 160) + bl memcpy + mov lr, x16 + mov sp, x0 + isb + ret +SYM_FUNC_END(koi_switch_to_kernel_stack) +#endif + +SYM_FUNC_START(koi_switch_to_ko_pgtbl) + stp x0, x1, [sp, #16 * 1] + stp x2, x3, [sp, #16 * 2] + stp x4, x5, [sp, #16 * 3] + stp x6, x7, [sp, #16 * 4] + stp x8, x9, [sp, #16 * 5] + stp x10, x11, [sp, #16 * 6] + stp x12, x13, [sp, #16 * 7] + stp x14, x15, [sp, #16 * 8] + stp x16, x17, [sp, #16 * 9] + stp x18, x30, [sp, #16 * 10] + + adrp x0, koi_swapper_ttbr1 + ldr x0, [x0, #:lo12:koi_swapper_ttbr1] + cbz x0, 0f + bl koi_do_switch_to_ko_pgtbl + // if x0 == 0, don't need to switch pgtable and stack, jump to 0 + cbz x0, 0f + mov x19, x0 + // if current on task's kernel stack, switch to ko stack + mrs x0, sp_el0 + mov x1, sp + ldr x2, [x0, TSK_STACK] + eor x2, x2, x1 + and x2, x2, #~(THREAD_SIZE - 1) + cbnz x2, 1f + + bl koi_switch_to_ko_stack +1: +#ifndef CONFIG_IEE + msr ttbr1_el1, x19 + isb + nop + nop + nop +#else + mov x0, #IEE_SWITCH_TO_KOI + mov x1, x19 + bl iee_rwx_gate_entry +#endif +0: + + ldp x0, x1, [sp, #16 * 1] + ldp x2, x3, [sp, #16 * 2] + ldp x4, x5, [sp, #16 * 3] + ldp x6, x7, [sp, #16 * 4] + ldp x8, x9, [sp, #16 * 5] + ldp x10, x11, [sp, #16 * 6] + ldp x12, x13, [sp, #16 * 7] + ldp x14, x15, [sp, #16 * 8] + ldp x16, x17, [sp, #16 * 9] + ldp x18, x30, [sp, #16 * 10] + ret +SYM_FUNC_END(koi_switch_to_ko_pgtbl) + +.pushsection ".koi.text", "ax" +SYM_FUNC_START(koi_switch_to_kernel_pgtbl) + sub sp, sp, #160 + stp x0, x1, [sp, #16 * 0] + stp x2, x3, [sp, #16 * 1] + stp x4, x5, [sp, #16 * 2] + stp x6, x7, [sp, #16 * 3] + stp x8, x9, [sp, #16 * 4] + stp x10, x11, [sp, #16 * 5] + + stp x12, x13, [sp, #16 * 6] + stp x14, x15, [sp, #16 * 7] + stp x16, x17, [sp, #16 * 8] + stp x18, x30, [sp, #16 * 9] + // check whether paging init finished + adrp x0, koi_swapper_ttbr1 + ldr x0, [x0, #:lo12:koi_swapper_ttbr1] + cbz x0, 0f + + bl koi_do_switch_to_kernel_pgtbl + /* + * koi_do_switch_to_kernel_pgtbl return 0 indicates + * that when exception occurred, the isolated ko is executing under koi pgtbl, + * so we need to switch stack to kernel stack after switch pgtbl back to koi_swapper_ttbr1. + */ + cbz x0, 0f +#ifndef CONFIG_IEE + mrs x0, sp_el0 + adrp x1, koi_offset + ldr x1, [x1, #:lo12:koi_offset] + add x0, x0, x1 + mov x16, sp + ldr x17, [x0, koi_stack_base_from_token_offset] + eor x17, x17, x16 + and x17, x17, #~(THREAD_SIZE - 1) + cbnz x17, 0f +#else + // save current pan + mrs x17, pan + // disable pan + msr pan, 0x0 + mrs x0, sp_el0 + bl _iee_read_koi_stack_base + // restore pan + msr pan, x17 + + mov x16, sp + eor x0, x0, x16 + and x0, x0, #~(THREAD_SIZE - 1) + cbnz x0, 0f +#endif + bl koi_switch_to_kernel_stack +0: + + ldp x0, x1, [sp, #16 * 0] + ldp x2, x3, [sp, #16 * 1] + ldp x4, x5, [sp, #16 * 2] + ldp x6, x7, [sp, #16 * 3] + ldp x8, x9, [sp, #16 * 4] + ldp x10, x11, [sp, #16 * 5] + ldp x12, x13, [sp, #16 * 6] + ldp x14, x15, [sp, #16 * 7] + ldp x16, x17, [sp, #16 * 8] + ldp x18, x30, [sp, #16 * 9] + add sp, sp, #160 + ret +SYM_FUNC_END(koi_switch_to_kernel_pgtbl) +.popsection +#endif + .macro kernel_ventry, el:req, ht:req, regsize:req, label:req .align 7 .Lventry_start\@: @@ -151,6 +530,17 @@ alternative_else_nop_endif #endif .endm +#ifdef CONFIG_IEE +// SP_EL0 check failed. +SYM_FUNC_START_LOCAL(sp_el0_check_failed) + mov x0, sp + mov x1, #BAD_SP_EL0 + mrs x2, esr_el1 + bl iee_bad_mode + ASM_BUG() +SYM_FUNC_END(sp_el0_check_failed) +#endif + /* Clear the MTE asynchronous tag check faults */ .macro clear_mte_async_tcf thread_sctlr #ifdef CONFIG_ARM64_MTE @@ -224,6 +614,14 @@ alternative_cb_end ldr_this_cpu tsk, __entry_task, x20 msr sp_el0, tsk +#ifdef CONFIG_IEE + // tsk check. + ldr_this_cpu x19, __entry_task, x20 + mrs x20, sp_el0 + cmp x19, x20 + b.ne sp_el0_check_failed +#endif + /* * Ensure MDSCR_EL1.SS is clear, since we can unmask debug exceptions * when scheduling. @@ -276,6 +674,13 @@ alternative_else_nop_endif scs_load_current .else +#ifdef CONFIG_IEE + // tsk check. + ldr_this_cpu x19, __entry_task, x20 + mrs x20, sp_el0 + cmp x19, x20 + b.ne sp_el0_check_failed +#endif add x21, sp, #PT_REGS_SIZE get_current_task tsk .endif /* \el == 0 */ @@ -333,9 +738,11 @@ alternative_else_nop_endif .endm .macro kernel_exit, el + #ifndef CONFIG_IEE .if \el != 0 disable_daif .endif + #endif #ifdef CONFIG_ARM64_PSEUDO_NMI alternative_if_not ARM64_HAS_GIC_PRIO_MASKING @@ -411,6 +818,41 @@ alternative_else_nop_endif msr elr_el1, x21 // set up the return data msr spsr_el1, x22 + +#ifdef CONFIG_IEE + + .if \el == 0 + + #ifndef CONFIG_UNMAP_KERNEL_AT_EL0 + // SET hpd1 = 0 start + mrs x0, tcr_el1 + and x0, x0, #0xFFFFFBFFFFFFFFFF + and x0, x0, #0xFFFFFFFFFFBFFFFF + msr tcr_el1, x0 + // SET hpd1 = 0 end + + disable_daif + + // Check ELR_EL1 + mrs x0, elr_el1 + lsr x0, x0, #48 + tst x0, #0xffff + b.ne 5f + #endif + + .endif + +#else +#ifdef CONFIG_KOI + .if \el==0 + mrs x0, tcr_el1 + and x0, x0, #0xFFFFFFFFFFBFFFFF + msr tcr_el1,x0 + .endif +#endif + +#endif + ldp x0, x1, [sp, #16 * 0] ldp x2, x3, [sp, #16 * 1] ldp x4, x5, [sp, #16 * 2] @@ -569,12 +1011,167 @@ SYM_CODE_START_LOCAL(__bad_stack) SYM_CODE_END(__bad_stack) #endif /* CONFIG_VMAP_STACK */ +/* + * iee exception entry + */ + .macro iee_exception_entry, el + + /* Check whether exception is permmited. */ + ldr x1, =__iee_si_no_irq + cmp x1, x22 + b.hi 1148f + ldr x1, =__iee_si_end + cmp x1, x22 + b.lo 1148f + /* ELR check fail */ + mov x0, sp + mov x1, #BAD_IEE_SI + mrs x2, esr_el1 + bl iee_bad_mode + ASM_BUG() +1148: + + /* el0 set hpds */ + .if \el == 0 + + #ifndef CONFIG_UNMAP_KERNEL_AT_EL0 + /* SET hpd1 = 1 start */ + mrs x0, tcr_el1 + orr x0, x0, #0x0000040000000000 + orr x0, x0, #0x0000000000400000 + msr tcr_el1, x0 + /* SET hpd1 = 1 end */ + + disable_daif + + /* Check TCR_EL1 */ + mrs x0, tcr_el1 + tst x0, #0x0000040000000000 + b.eq 5f + tst x0, #0x0000000000400000 + b.ne 6f + +5: + /* TCR_EL1 check fail */ + mov x0, sp + mov x1, #BAD_TCR_EL1 + mrs x2, esr_el1 + bl iee_bad_mode + ASM_BUG() + +6: + nop + #endif + + .else +#ifdef CONFIG_IEE_INTERRUPTABLE + /* el1 save elr_el1 and set pan */ + /* Check ELR_EL1 */ + ldr x1, =__iee_code_start + cmp x1, x22 + b.hi 7f + ldr x1, =__iee_code_end + cmp x1, x22 + b.lo 7f + /* Exception from iee code */ + /* Switch to kernel stack */ + mrs x0, sp_el0 /* x0 -> task_struct(VA) */ + adrp x2, iee_offset + ldr x2, [x2, #:lo12:iee_offset] + add x1, x0, x2 /* x1 -> task_token(IEE) */ + // store iee stack + mov x3, sp + str x3, [x1, #iee_from_token_offset] + // load kernel stack + ldr x3, [x1, #kernel_from_token_offset] + mov sp, x3 + sub sp, sp, #PT_REGS_SIZE + /* Enable PAN */ + msr pan, #0x1 + +7: + /* Exception from kernel code */ + mov x0, #0x0 + mov x1, #0x0 + mov x2, #0x0 + mov x3, #0x0 +#endif + .endif + .endm + +/* + * iee exception exit + */ + .macro iee_exception_exit, el + // Disable daif + disable_daif + + .if \el == 1 +#ifdef CONFIG_IEE_INTERRUPTABLE + /* el1 pop elr_el1 and set pan */ + /* Check ELR_EL1 */ + ldr x1, =__iee_code_start + cmp x1, x22 + b.hi 9f + ldr x1, =__iee_code_end + cmp x1, x22 + b.lo 9f + /* Eret iee code */ + /* Disable PAN */ + msr pan, #0x0 + /* Switch to iee stack */ + add sp, sp, #PT_REGS_SIZE + mrs x0, sp_el0 /* x0 -> task_struct */ + adrp x2, iee_offset + ldr x2, [x2, #:lo12:iee_offset] + add x1, x0, x2 /* x1 -> task_token(IEE) */ + // store kernel stack + mov x3, sp + str x3, [x1, #kernel_from_token_offset] + // load iee stack + ldr x2, [x1, #iee_from_token_offset] + mov sp, x2 + /* Load ELR_EL1 from iee stack */ + ldr x21, [sp, #S_PC] + /* Check the modify of ELR_EL1 */ + cmp x21, x22 + b.ne 8f + /* ELR_EL1 not modified */ + b 9f + +8: + // ELR_EL1 modified + mov x0, sp + mov x1, #BAD_ELR_EL1 + mrs x2, esr_el1 + bl iee_bad_mode + ASM_BUG() + +9: + // Eret kernel code + mov x0, #0x0 + mov x1, #0x0 + mov x2, #0x0 + mov x3, #0x0 +#endif + .endif + .endm .macro entry_handler el:req, ht:req, regsize:req, label:req SYM_CODE_START_LOCAL(el\el\ht\()_\regsize\()_\label) kernel_entry \el, \regsize + + #ifdef CONFIG_IEE + iee_exception_entry \el + #endif + mov x0, sp bl el\el\ht\()_\regsize\()_\label\()_handler + + #ifdef CONFIG_IEE + iee_exception_exit \el + #endif + .if \el == 0 b ret_to_user .else @@ -844,6 +1441,13 @@ SYM_FUNC_START(cpu_switch_to) ldr lr, [x8] mov sp, x9 msr sp_el0, x1 +#ifdef CONFIG_IEE + // tsk check. + ldr_this_cpu x8, __entry_task, x9 + mrs x9, sp_el0 + cmp x8, x9 + b.ne sp_el0_check_failed +#endif ptrauth_keys_install_kernel x1, x8, x9, x10 scs_save x0 scs_load_current @@ -1033,6 +1637,13 @@ SYM_CODE_START(__sdei_asm_handler) mrs x28, sp_el0 ldr_this_cpu dst=x0, sym=__entry_task, tmp=x1 msr sp_el0, x0 +#ifdef CONFIG_IEE + // tsk check. + ldr_this_cpu x0, __entry_task, x1 + mrs x1, sp_el0 + cmp x0, x1 + b.ne sp_el0_check_failed +#endif /* If we interrupted the kernel point to the previous stack/frame. */ and x0, x3, #0xc diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 5cdfcc9e3e54..c0af965bd92e 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1309,7 +1309,11 @@ void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) isb(); /* Allow EL0 to access TPIDR2 */ + #ifdef CONFIG_IEE + iee_rwx_gate_entry(IEE_WRITE_sctlr_el1, read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2); + #else write_sysreg(read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2, SCTLR_EL1); + #endif isb(); } diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 6517bf2644a0..5c75e069d877 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -464,6 +464,42 @@ SYM_FUNC_END(create_kernel_mapping) set_this_cpu_offset \tmp1 .endm +#ifdef CONFIG_IEE + .macro init_cpu_task_checked tsk, tmp1, tmp2 + msr sp_el0, \tsk + // tsk check. + adrp x29, __per_cpu_offset + mrs \tmp1, sp_el0 + ldr \tmp2, [\tmp1, #TSK_TI_CPU] /* cpu number */ +1: + cmp \tmp2, #0 + b.eq 2f + add x29, x29, #8 + sub \tmp2, \tmp2, #1 + b 1b +2: + ldr \tmp2, [x29, #:lo12:__per_cpu_offset] /* cpu offset */ + adr_l x29, __entry_task + ldr x29, [x29, \tmp2] + cmp x29, \tmp1 + b.ne sp_el0_check_failed + + ldr \tmp1, [\tsk, #TSK_STACK] + add sp, \tmp1, #THREAD_SIZE + sub sp, sp, #PT_REGS_SIZE + + stp xzr, xzr, [sp, #S_STACKFRAME] + add x29, sp, #S_STACKFRAME + + scs_load_current + + adr_l \tmp1, __per_cpu_offset + ldr w\tmp2, [\tsk, #TSK_TI_CPU] + ldr \tmp1, [\tmp1, \tmp2, lsl #3] + set_this_cpu_offset \tmp1 + .endm +#endif + /* * The following fragment of code is executed with the MMU enabled. * @@ -661,6 +697,18 @@ SYM_FUNC_START_LOCAL(secondary_startup) SYM_FUNC_END(secondary_startup) .text +#ifdef CONFIG_IEE +// SP_EL0 check failed. +SYM_FUNC_START_LOCAL(sp_el0_check_failed) + 1: + nop + nop + nop + nop + b 1f +SYM_FUNC_END(sp_el0_check_failed) +#endif + SYM_FUNC_START_LOCAL(__secondary_switched) mov x0, x20 bl set_cpu_boot_mode_flag @@ -677,7 +725,11 @@ SYM_FUNC_START_LOCAL(__secondary_switched) ldr x2, [x0, #CPU_BOOT_TASK] cbz x2, __secondary_too_slow +#ifdef CONFIG_IEE + init_cpu_task_checked x2, x1, x3 +#else init_cpu_task x2, x1, x3 +#endif #ifdef CONFIG_ARM64_PTR_AUTH ptrauth_keys_init_cpu x2, x3, x4, x5 @@ -746,6 +798,10 @@ SYM_FUNC_START(__enable_mmu) cmp x3, #ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX b.gt __no_granule_support phys_to_ttbr x2, x2 +#ifdef CONFIG_IEE + mov x3, #1 + bfi x2, x3, #48, #16 // ASID 1 is used by IEE rwx gate. +#endif msr ttbr0_el1, x2 // load TTBR0 load_ttbr1 x1, x1, x3 diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 02870beb271e..1c14428a3ed4 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -34,6 +34,10 @@ #include <asm/trans_pgd.h> #include <asm/virt.h> +#ifdef CONFIG_PTP +#include <linux/iee-func.h> +#endif + /* * Hibernate core relies on this value being 0 on resume, and marks it * __nosavedata assuming it will keep the resume kernel's '0' value. This @@ -197,12 +201,22 @@ static int create_safe_exec_page(void *src_start, size_t length, phys_addr_t trans_ttbr0; unsigned long t0sz; int rc; + #ifdef CONFIG_PTP + unsigned long iee_addr; + #endif if (!page) return -ENOMEM; memcpy(page, src_start, length); caches_clean_inval_pou((unsigned long)page, (unsigned long)page + length); + + #ifdef CONFIG_PTP + iee_addr = __phys_to_iee(__pa(page_address(page))); + set_iee_page_valid(iee_addr); + iee_set_logical_mem_ro((unsigned long)page_address(page)); + #endif + rc = trans_pgd_idmap_page(&trans_info, &trans_ttbr0, &t0sz, page); if (rc) return rc; diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index d39a8787edf2..b5ac4b7670bc 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -26,6 +26,10 @@ #include <asm/cputype.h> #include <asm/system_misc.h> +#ifdef CONFIG_IEE +#include <asm/iee-si.h> +#endif + /* Breakpoint currently in use for each BRP. */ static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]); @@ -102,13 +106,68 @@ int hw_breakpoint_slots(int type) WRITE_WB_REG_CASE(OFF, 14, REG, VAL); \ WRITE_WB_REG_CASE(OFF, 15, REG, VAL) +#ifdef CONFIG_IEE + +#define IEE_SI_READ_WB_REG_CASE(OFF, N, REG, VAL) \ + case (OFF + N): \ + IEE_SI_AARCH64_DBG_READ(N, REG, VAL); \ + break + +#define IEE_SI_WRITE_WB_REG_CASE(OFF, N, REG, VAL) \ + case (OFF + N): \ + IEE_SI_AARCH64_DBG_WRITE(N, REG, VAL); \ + break + +#define IEE_SI_GEN_READ_REG_CASES(OFF, REG, VAL) \ + IEE_SI_READ_WB_REG_CASE(OFF, 0, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 1, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 2, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 3, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 4, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 5, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 6, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 7, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 8, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 9, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 10, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 11, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 12, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 13, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 14, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 15, REG, VAL) + +#define IEE_SI_GEN_WRITE_REG_CASES(OFF, REG, VAL) \ + IEE_SI_WRITE_WB_REG_CASE(OFF, 0, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 1, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 2, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 3, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 4, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 5, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 6, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 7, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 8, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 9, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 10, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 11, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 12, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 13, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 14, REG, VAL); \ + WRITE_WB_REG_CASE(OFF, 15, REG, VAL) + +#endif + static u64 read_wb_reg(int reg, int n) { u64 val = 0; switch (reg + n) { +// #ifdef CONFIG_IEE +// IEE_SI_GEN_READ_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val); +// IEE_SI_GEN_READ_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val); +// #else GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val); GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val); +// #endif GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val); GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val); default: @@ -122,8 +181,13 @@ NOKPROBE_SYMBOL(read_wb_reg); static void write_wb_reg(int reg, int n, u64 val) { switch (reg + n) { +// #ifdef CONFIG_IEE +// IEE_SI_GEN_WRITE_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val); +// IEE_SI_GEN_WRITE_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val); +// #else GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val); GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val); +// #endif GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val); GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val); default: @@ -171,6 +235,10 @@ static int is_a32_compat_bp(struct perf_event *bp) return tsk && is_a32_compat_thread(task_thread_info(tsk)); } +#ifdef CONFIG_IEE +int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw); +#endif + /** * hw_breakpoint_slot_setup - Find and setup a perf slot according to * operations @@ -191,6 +259,37 @@ static int hw_breakpoint_slot_setup(struct perf_event **slots, int max_slots, { int i; struct perf_event **slot; +// reserve hw breakpoint 0 for iee rwx gate in kernel sapce. +// #ifdef CONFIG_IEE +// struct arch_hw_breakpoint *info = counter_arch_bp(bp); +// if (arch_check_bp_in_kernelspace(info)){ +// for (i = 1; i < max_slots; ++i) { // search from hw breakpoint 1 +// slot = &slots[i]; +// switch (ops) { +// case HW_BREAKPOINT_INSTALL: +// if (!*slot) { +// *slot = bp; +// return i; +// } +// break; +// case HW_BREAKPOINT_UNINSTALL: +// if (*slot == bp) { +// *slot = NULL; +// return i; +// } +// break; +// case HW_BREAKPOINT_RESTORE: +// if (*slot == bp) +// return i; +// break; +// default: +// pr_warn_once("Unhandled hw breakpoint ops %d\n", ops); +// return -EINVAL; +// } +// } +// return -ENOSPC; +// } +// #endif for (i = 0; i < max_slots; ++i) { slot = &slots[i]; diff --git a/arch/arm64/kernel/iee/Makefile b/arch/arm64/kernel/iee/Makefile new file mode 100644 index 000000000000..123c68c5cc4e --- /dev/null +++ b/arch/arm64/kernel/iee/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_IEE) += iee.o iee-gate.o iee-func.o \ No newline at end of file diff --git a/arch/arm64/kernel/iee/iee-func.c b/arch/arm64/kernel/iee/iee-func.c new file mode 100644 index 000000000000..7764dbd41555 --- /dev/null +++ b/arch/arm64/kernel/iee/iee-func.c @@ -0,0 +1,187 @@ +#include "asm/pgtable.h" +#include <linux/memory.h> +#include <linux/mm.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> +#include <asm/pgalloc.h> + +void set_iee_page_valid(unsigned long addr) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + + p4d_t *p4dp = p4d_offset(pgdp, addr); + + pud_t *pudp = pud_offset(p4dp, addr); + + pmd_t *pmdp = pmd_offset(pudp, addr); + + pte_t *ptep = pte_offset_kernel(pmdp, addr); + pte_t pte = READ_ONCE(*ptep); + + if((addr < (PAGE_OFFSET + IEE_OFFSET)) | (addr > (PAGE_OFFSET + BIT(vabits_actual - 1)))) + return; + + pte = __pte(pte_val(pte) | 0x1); + set_pte(ptep, pte); + flush_tlb_kernel_range(addr, addr+PAGE_SIZE); + isb(); +} + +void set_iee_page_invalid(unsigned long addr) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + + p4d_t *p4dp = p4d_offset(pgdp, addr); + + pud_t *pudp = pud_offset(p4dp, addr); + + pmd_t *pmdp = pmd_offset(pudp, addr); + + pte_t *ptep = pte_offset_kernel(pmdp, addr); + pte_t pte = READ_ONCE(*ptep); + + if((addr < (PAGE_OFFSET + IEE_OFFSET)) | (addr > (PAGE_OFFSET + BIT(vabits_actual - 1)))) + return; + + pte = __pte(pte_val(pte) & ~0x1); + set_pte(ptep, pte); + flush_tlb_kernel_range(addr, addr+PAGE_SIZE); + isb(); +} + +void iee_set_logical_mem_ro(unsigned long addr) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + + p4d_t *p4dp = p4d_offset(pgdp, addr); + + pud_t *pudp = pud_offset(p4dp, addr); + + pmd_t *pmdp = pmd_offset(pudp, addr); + + pte_t *ptep = pte_offset_kernel(pmdp, addr); + pte_t pte = READ_ONCE(*ptep); + + if(addr < PAGE_OFFSET) + return; + + pte = __pte((pte_val(pte) | PTE_RDONLY) & ~PTE_DBM); + set_pte(ptep, pte); + flush_tlb_kernel_range(addr, addr+PAGE_SIZE); + isb(); +} + +void iee_set_logical_mem_rw(unsigned long addr) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + + p4d_t *p4dp = p4d_offset(pgdp, addr); + + pud_t *pudp = pud_offset(p4dp, addr); + + pmd_t *pmdp = pmd_offset(pudp, addr); + + pte_t *ptep = pte_offset_kernel(pmdp, addr); + pte_t pte = READ_ONCE(*ptep); + + if((addr < PAGE_OFFSET) | (addr > (PAGE_OFFSET + BIT(vabits_actual - 2)))) + return; + + pte = __pte(pte_val(pte) | PTE_DBM); + set_pte(ptep, pte); + flush_tlb_kernel_range(addr, addr+PAGE_SIZE); + isb(); +} + +void iee_set_token_page_valid(void *token, void *new) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, (unsigned long)token); + + p4d_t *p4dp = p4d_offset(pgdp, (unsigned long)token); + + pud_t *pudp = pud_offset(p4dp, (unsigned long)token); + + pmd_t *pmdp = pmd_offset(pudp, (unsigned long)token); + + pte_t *ptep = pte_offset_kernel(pmdp, (unsigned long)token); + pte_t pte = READ_ONCE(*ptep); + pte = __pte(((pte_val(pte) | 0x1) & ~PTE_ADDR_MASK) | __phys_to_pte_val(__pa(new))); + set_pte(ptep, pte); + flush_tlb_kernel_range((unsigned long)token, (unsigned long)(token+PAGE_SIZE)); + isb(); +} + +void iee_set_token_page_invalid(void *token) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, (unsigned long)token); + + p4d_t *p4dp = p4d_offset(pgdp, (unsigned long)token); + + pud_t *pudp = pud_offset(p4dp, (unsigned long)token); + + pmd_t *pmdp = pmd_offset(pudp, (unsigned long)token); + + pte_t *ptep = pte_offset_kernel(pmdp, (unsigned long)token); + pte_t pte = READ_ONCE(*ptep); + pte = __pte(((pte_val(pte) & ~((unsigned long)0x1)) & ~PTE_ADDR_MASK) | __phys_to_pte_val(__pa(token - IEE_OFFSET))); + set_pte(ptep, pte); + flush_tlb_kernel_range((unsigned long)token, (unsigned long)(token+PAGE_SIZE)); + isb(); +} + +void iee_set_kernel_ppage(unsigned long addr) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + + p4d_t *p4dp = p4d_offset(pgdp, addr); + + pud_t *pudp = pud_offset(p4dp, addr); + + pmd_t *pmdp = pmd_offset(pudp, addr); + + pte_t *ptep = pte_offset_kernel(pmdp, addr); + + int i; + for(i = 0; i < 4; i++) + { + pte_t pte = READ_ONCE(*ptep); + pte = __pte(pte_val(pte) & ~PTE_USER & ~PTE_NG); + iee_set_pte_ppage(ptep, pte); + ptep++; + } + flush_tlb_kernel_range(addr, addr+4*PAGE_SIZE); + isb(); +} + +void iee_set_kernel_upage(unsigned long addr) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + + p4d_t *p4dp = p4d_offset(pgdp, addr); + p4d_t p4d = READ_ONCE(*p4dp); + + pud_t *pudp = pud_offset(p4dp, addr); + + pmd_t *pmdp = pmd_offset(pudp, addr); + + pte_t *ptep = pte_offset_kernel(pmdp, addr); + + int i; + for(i = 0; i < 4; i++) + { + pte_t pte = READ_ONCE(*ptep); + pte = __pte(pte_val(pte) | PTE_USER | PTE_NG); + iee_set_pte_upage(ptep, pte); + ptep++; + } + flush_tlb_kernel_range(addr, addr+4*PAGE_SIZE); + isb(); +} \ No newline at end of file diff --git a/arch/arm64/kernel/iee/iee-gate.S b/arch/arm64/kernel/iee/iee-gate.S new file mode 100644 index 000000000000..6de99a018bde --- /dev/null +++ b/arch/arm64/kernel/iee/iee-gate.S @@ -0,0 +1,174 @@ +#include <asm/asm-offsets.h> +#include <linux/linkage.h> +#include <asm/bug.h> +#include <asm-generic/export.h> + +#ifdef CONFIG_IEE + +SYM_FUNC_START(iee_rw_gate) + /* save daif, close irq */ + mrs x13, daif + msr daifset, #0x2 + isb + /* save lr */ + sub sp, sp, #16 + stp x29, x30, [sp] + bl iee_protected_rw_gate + /* restore lr */ + ldp x29, x30, [sp] + add sp, sp, #16 + /* restore daif */ + msr daif, x13 + ret +SYM_FUNC_END(iee_rw_gate) +#if defined(CONFIG_CREDP) || defined(CONFIG_KOI) +EXPORT_SYMBOL(iee_rw_gate) +#endif + + .pushsection ".iee.text.header", "ax" + +SYM_FUNC_START(iee_protected_rw_gate) + mrs x9, pan + /* disable PAN */ + msr pan, #0x0 + /* switch to iee stack */ + mrs x9, sp_el0 /* x9 -> task_struct */ + adrp x12, iee_offset + ldr x12, [x12, #:lo12:iee_offset] + add x11, x9, x12 /* x11 -> task_token(IEE) */ + // store kernel stack + mov x10, sp + str x10, [x11, #kernel_from_token_offset] + // load iee stack + ldr x10, [x11, #iee_from_token_offset] + mov sp, x10 +#ifdef CONFIG_IEE_INTERRUPTABLE + isb + /* restore daif */ + msr daif, x13 + sub sp, sp, #16 + stp x29, x30, [sp] +#else + sub sp, sp, #16 + stp x13, x30, [sp] +#endif + /* call iee func */ + bl iee_dispatch +#ifdef CONFIG_IEE_INTERRUPTABLE + ldp x29, x30, [sp] + add sp, sp, #16 + /* store and disable daif */ + mrs x13, daif + msr daifset, #0x2 + isb +#else + ldp x13, x30, [sp] + add sp, sp, #16 +#endif + /* switch to kernel stack */ + mrs x9, sp_el0 /* x9 -> task_struct(VA) */ + adrp x12, iee_offset + ldr x12, [x12, #:lo12:iee_offset] + add x11, x9, x12 /* x11 -> task_token(IEE) */ + // store iee stack + mov x10, sp + str x10, [x11, #iee_from_token_offset] + // load kernel stack + ldr x10, [x11, #kernel_from_token_offset] + mov sp, x10 + /* enable PAN */ + msr pan, #0x1 + ret +SYM_FUNC_END(iee_protected_rw_gate) + + .popsection + +#include <asm/asm-bug.h> +#define BAD_IEE 4 +#define BAD_IEE_SI 5 + +#define SYS_TCR_EL1_HPD1 0x40000000000 +#define SYS_TCR_EL1_A1 0x400000 + + .pushsection ".iee.exec_entry", "ax" + +SYM_FUNC_START(iee_rwx_gate_entry) + /* Disable irq first. */ + mrs x15, daif // use x15 to restore daif + msr DAIFSet, #0xf + isb + + /* Set HPD1 = 0 to exec follwing codes in U RWX page */ + mrs x9, tcr_el1 + bic x9, x9, #SYS_TCR_EL1_HPD1 + bic x9, x9, #SYS_TCR_EL1_A1 + msr tcr_el1, x9 + isb + + b iee_rwx_gate_tramp +SYM_FUNC_END(iee_rwx_gate_entry) + .popsection + + .pushsection ".iee.si_text", "awx" + +SYM_FUNC_START(iee_rwx_gate_tramp) + /* Check tcr val. */ + mrs x10, tcr_el1 + adrp x12, iee_si_tcr // tcr val shall be const after init + ldr x12, [x12, #:lo12:iee_si_tcr] + cbz x12, 1f + cmp x12, x10 + b.ne 3f +1: + mov x13, sp + /* If iee hasn't been initialized, skip stack switch. */ + ldr x11, =iee_init_done + ldr x10, [x11] + cbz x10, 2f + + /* Switch to iee stack */ + mrs x9, sp_el0 // x9 -> task_struct + adrp x12, iee_offset + ldr x12, [x12, #:lo12:iee_offset] + add x11, x9, x12 // x11 -> task_token(IEE) + // load iee stack + ldr x10, [x11, #iee_from_token_offset] + mov sp, x10 + + /* x15 stores daif and x13 stores previous sp */ +2: + stp x15, x13, [sp, #-32]! + stp x29, x30, [sp, #16] + bl iee_si_handler // enter actual handler + ldp x29, x30, [sp, #16] + + b iee_rwx_gate_exit // jump to iee exit +3: + mov x0, sp + mov x1, #BAD_IEE_SI + mrs x2, esr_el1 + bl iee_bad_mode + ASM_BUG() +SYM_FUNC_END(iee_rwx_gate_tramp) + + .popsection + + .pushsection ".iee.exec_exit", "ax" + +SYM_FUNC_START(iee_rwx_gate_exit) + ldp x15, x13, [sp], #32 + mov sp, x13 // switch to kernel stack + mrs x9, tcr_el1 + orr x9, x9, #SYS_TCR_EL1_HPD1 + orr x9, x9, #SYS_TCR_EL1_A1 + msr tcr_el1, x9 +/* --------Page boundary-------- */ + isb + msr daif, x15 + isb + ret +SYM_FUNC_END(iee_rwx_gate_exit) + + .popsection + +#endif diff --git a/arch/arm64/kernel/iee/iee.c b/arch/arm64/kernel/iee/iee.c new file mode 100644 index 000000000000..6b9f7d40df67 --- /dev/null +++ b/arch/arm64/kernel/iee/iee.c @@ -0,0 +1,1360 @@ +#include "linux/sched.h" +#include <linux/stdarg.h> +#include <asm/pgtable-types.h> +#include <asm/iee.h> +#include <asm/iee-si.h> +#include <asm/sysreg.h> +#include <linux/pgtable.h> +#include <linux/cred.h> +#include <asm/iee-slab.h> +#include <asm/percpu.h> + +#ifdef CONFIG_IEE +extern struct cred init_cred; +extern s64 memstart_addr; + +void __iee_code _iee_set_swapper_pgd(pgd_t *pgdp, pgd_t pgd); +void __iee_code _iee_set_tramp_pgd_pre_init(pgd_t *pgdp, pgd_t pgd); +void __iee_code _iee_set_pte(pte_t *ptep, pte_t pte); +void __iee_code _iee_set_pmd(pmd_t *pmdp, pmd_t pmd); +void __iee_code _iee_set_pud(pud_t *pudp, pud_t pud); +void __iee_code _iee_set_p4d(p4d_t *p4dp, p4d_t p4d); +void __iee_code _iee_set_bm_pte(pte_t *ptep, pte_t pte); +pteval_t __iee_code _iee_set_xchg_relaxed(pte_t *ptep, pteval_t pteval); +pteval_t __iee_code _iee_set_cmpxchg_relaxed(pte_t *ptep, pteval_t old_pteval, pteval_t new_pteval); +void __iee_code _iee_write_in_byte(void *ptr, __u64 data, int length); +void __iee_code _iee_set_cred_uid(struct cred *cred, kuid_t uid); +void __iee_code _iee_set_cred_gid(struct cred *cred, kgid_t gid); +void __iee_code _iee_copy_cred(struct cred *old, struct cred *new); +void __iee_code _iee_set_cred_suid(struct cred *cred, kuid_t suid); +void __iee_code _iee_set_cred_sgid(struct cred *cred, kgid_t sgid); +void __iee_code _iee_set_cred_euid(struct cred *cred, kuid_t euid); +void __iee_code _iee_set_cred_egid(struct cred *cred, kgid_t egid); +void __iee_code _iee_set_cred_fsuid(struct cred *cred, kuid_t fsuid); +void __iee_code _iee_set_cred_fsgid(struct cred *cred, kgid_t fsgid); +void __iee_code _iee_set_cred_user(struct cred *cred, struct user_struct *user); +void __iee_code _iee_set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns); +void __iee_code _iee_set_cred_ucounts(struct cred *cred, struct ucounts *ucounts); +void __iee_code _iee_set_cred_group_info(struct cred *cred, struct group_info *group_info); +void __iee_code _iee_set_cred_securebits(struct cred *cred, unsigned securebits); +void __iee_code _iee_set_cred_cap_inheritable(struct cred *cred, kernel_cap_t cap_inheritable); +void __iee_code _iee_set_cred_cap_permitted(struct cred *cred, kernel_cap_t cap_permitted); +void __iee_code _iee_set_cred_cap_effective(struct cred *cred, kernel_cap_t cap_effective); +void __iee_code _iee_set_cred_cap_bset(struct cred *cred, kernel_cap_t cap_bset); +void __iee_code _iee_set_cred_cap_ambient(struct cred *cred, kernel_cap_t cap_ambient); +void __iee_code _iee_set_cred_jit_keyring(struct cred *cred, unsigned char jit_keyring); +void __iee_code _iee_set_cred_session_keyring(struct cred *cred, struct key *session_keyring); +void __iee_code _iee_set_cred_process_keyring(struct cred *cred, struct key *process_keyring); +void __iee_code _iee_set_cred_thread_keyring(struct cred *cred, struct key *thread_keyring); +void __iee_code _iee_set_cred_request_key_auth(struct cred *cred, struct key *request_key_auth); +void __iee_code _iee_set_cred_non_rcu(struct cred *cred, int non_rcu); +void __iee_code _iee_set_cred_atomic_set_usage(struct cred *cred, int i); +bool __iee_code _iee_set_cred_atomic_op_usage(struct cred *cred, int flag, int nr); +void __iee_code _iee_set_cred_security(struct cred *cred, void *security); +void __iee_code _iee_set_cred_rcu(struct cred *cred, struct rcu_head *rcu); +void __iee_code _iee_memset(void *ptr, int data, size_t n); +void __iee_code _iee_set_track(struct track *ptr, struct track *data); +void __iee_code _iee_set_freeptr(freeptr_t *pptr, freeptr_t ptr); +void __iee_code _iee_set_pte_upage(pte_t *ptep, pte_t pte); +void __iee_code _iee_set_pte_ppage(pte_t *ptep, pte_t pte); +void __iee_code _iee_set_token_mm(struct task_struct *tsk, struct mm_struct *mm); +void __iee_code _iee_set_token_pgd(struct task_struct *tsk, pgd_t *pgd); +void __iee_code _iee_init_token(struct task_struct *tsk, void *kernel_stack, void *iee_stack); +void __iee_code _iee_free_token(struct task_struct *tsk); +unsigned long __iee_code _iee_read_token_stack(struct task_struct *tsk); +void __iee_code _iee_write_entry_task(struct task_struct *tsk); +#ifdef CONFIG_KOI +unsigned long __iee_code _iee_read_koi_stack(struct task_struct *tsk); +void __iee_code _iee_write_koi_stack(struct task_struct *tsk, unsigned long koi_stack); +unsigned long __iee_code _iee_read_token_ttbr1(struct task_struct *tsk); +void __iee_code _iee_write_token_ttbr1(struct task_struct *tsk, unsigned long current_ttbr1); +unsigned long __iee_code _iee_read_koi_kernel_stack(struct task_struct *tsk); +void __iee_code _iee_write_koi_kernel_stack(struct task_struct *tsk, unsigned long kernel_stack); +unsigned long __iee_code _iee_read_koi_stack_base(struct task_struct *tsk); +void __iee_code _iee_write_koi_stack_base(struct task_struct *tsk, unsigned long koi_stack_base); +#endif + +/* wrapper functions */ +void __iee_code iee_wrapper_write_in_byte(va_list args) { + void *ptr = va_arg(args, void *); + __u64 data = va_arg(args, __u64); + int length = va_arg(args, int); + _iee_write_in_byte(ptr, data, length); +} + +void __iee_code iee_wrapper_set_pte(va_list args) { + pte_t *ptep = va_arg(args, pte_t *); + pte_t pte = va_arg(args, pte_t); + _iee_set_pte(ptep, pte); +} + +void __iee_code iee_wrapper_set_pmd(va_list args) { + pmd_t *pmdp = va_arg(args, pmd_t *); + pmd_t pmd = va_arg(args, pmd_t); + _iee_set_pmd(pmdp, pmd); +} + +void __iee_code iee_wrapper_set_p4d(va_list args) { + p4d_t *p4dp = va_arg(args, p4d_t *); + p4d_t p4d = va_arg(args, p4d_t); + _iee_set_p4d(p4dp, p4d); +} + +void __iee_code iee_wrapper_set_pud(va_list args) { + pud_t *pudp = va_arg(args, pud_t *); + pud_t pud = va_arg(args, pud_t); + _iee_set_pud(pudp, pud); +} + +void __iee_code iee_wrapper_set_bm_pte(va_list args) { + pte_t *ptep = va_arg(args, pte_t *); + pte_t pte = va_arg(args, pte_t); + _iee_set_bm_pte(ptep, pte); +} + +void __iee_code iee_wrapper_set_swapper_pgd(va_list args) { + pgd_t *pgdp = va_arg(args, pgd_t *); + pgd_t pgd = va_arg(args, pgd_t); + _iee_set_swapper_pgd(pgdp, pgd); +} + +void __iee_code iee_wrapper_set_tramp_pgd(va_list args) { + pgd_t *pgdp = va_arg(args, pgd_t *); + pgd_t pgd = va_arg(args, pgd_t); + _iee_set_tramp_pgd_pre_init(pgdp, pgd); +} + +pteval_t __iee_code iee_wrapper_set_xchg(va_list args) { + pteval_t ret; + pte_t *ptep = va_arg(args, pte_t *); + pteval_t pteval = va_arg(args, pteval_t); + ret = _iee_set_xchg_relaxed(ptep, pteval); + return (u64)ret; +} + +pteval_t __iee_code iee_wrapper_set_cmpxchg(va_list args) { + pteval_t ret; + pte_t *ptep = va_arg(args, pte_t *); + pteval_t old_pteval = va_arg(args, pteval_t); + pteval_t new_pteval = va_arg(args, pteval_t); + ret = _iee_set_cmpxchg_relaxed(ptep, old_pteval, new_pteval); + return (u64)ret; +} + +void __iee_code iee_wrapper_set_cred_uid(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + kuid_t uid = va_arg(args, kuid_t); + _iee_set_cred_uid(cred, uid); +} + +void __iee_code iee_wrapper_set_cred_gid(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + kgid_t gid = va_arg(args, kgid_t); + _iee_set_cred_gid(cred, gid); +} + +void __iee_code iee_wrapper_copy_cred(va_list args) { + struct cred *old = va_arg(args, struct cred *); + struct cred *new = va_arg(args, struct cred *); + _iee_copy_cred(old, new); +} + +void __iee_code iee_wrapper_set_cred_suid(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + kuid_t suid = va_arg(args, kuid_t); + _iee_set_cred_suid(cred, suid); +} + +void __iee_code iee_wrapper_set_cred_sgid(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + kgid_t sgid = va_arg(args, kgid_t); + _iee_set_cred_sgid(cred, sgid); +} + +void __iee_code iee_wrapper_set_cred_euid(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + kuid_t euid = va_arg(args, kuid_t); + _iee_set_cred_euid(cred, euid); +} + +void __iee_code iee_wrapper_set_cred_egid(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + kgid_t egid = va_arg(args, kgid_t); + _iee_set_cred_egid(cred, egid); +} + +void __iee_code iee_wrapper_set_cred_fsuid(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + kuid_t fsuid = va_arg(args, kuid_t); + _iee_set_cred_fsuid(cred, fsuid); +} + +void __iee_code iee_wrapper_set_cred_fsgid(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + kgid_t fsgid = va_arg(args, kgid_t); + _iee_set_cred_fsgid(cred, fsgid); +} + +void __iee_code iee_wrapper_set_cred_user(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + struct user_struct *user = va_arg(args, struct user_struct *); + _iee_set_cred_user(cred, user); +} + +void __iee_code iee_wrapper_set_cred_user_ns(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + struct user_namespace *user_ns = va_arg(args, struct user_namespace *); + _iee_set_cred_user_ns(cred, user_ns); +} + +void __iee_code iee_wrapper_set_cred_ucounts(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + struct ucounts *ucounts = va_arg(args, struct ucounts *); + _iee_set_cred_ucounts(cred, ucounts); +} + +void __iee_code iee_wrapper_set_cred_group_info(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + struct group_info *group_info = va_arg(args, struct group_info *); + _iee_set_cred_group_info(cred, group_info); +} + +void __iee_code iee_wrapper_set_cred_securebits(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + unsigned securebits = va_arg(args, unsigned); + _iee_set_cred_securebits(cred, securebits); +} + +void __iee_code iee_wrapper_set_cred_cap_inheritable(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + kernel_cap_t cap_inheritable = va_arg(args, kernel_cap_t); + _iee_set_cred_cap_inheritable(cred, cap_inheritable); +} + +void __iee_code iee_wrapper_set_cred_cap_permitted(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + kernel_cap_t cap_permitted = va_arg(args, kernel_cap_t); + _iee_set_cred_cap_permitted(cred, cap_permitted); +} + +void __iee_code iee_wrapper_set_cred_cap_effective(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + kernel_cap_t cap_effective = va_arg(args, kernel_cap_t); + _iee_set_cred_cap_effective(cred, cap_effective); +} + +void __iee_code iee_wrapper_set_cred_cap_bset(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + kernel_cap_t cap_bset = va_arg(args, kernel_cap_t); + _iee_set_cred_cap_bset(cred, cap_bset); +} + +void __iee_code iee_wrapper_set_cred_cap_ambient(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + kernel_cap_t cap_ambient = va_arg(args, kernel_cap_t); + _iee_set_cred_cap_ambient(cred, cap_ambient); +} + +void __iee_code iee_wrapper_set_cred_jit_keyring(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + unsigned long jit_keyring = va_arg(args, unsigned long); + _iee_set_cred_jit_keyring(cred, (unsigned char)jit_keyring); +} + +void __iee_code iee_wrapper_set_cred_session_keyring(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + struct key *session_keyring = va_arg(args, struct key *); + _iee_set_cred_session_keyring(cred, session_keyring); +} + +void __iee_code iee_wrapper_set_cred_process_keyring(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + struct key *process_keyring = va_arg(args, struct key *); + _iee_set_cred_process_keyring(cred, process_keyring); +} + +void __iee_code iee_wrapper_set_cred_thread_keyring(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + struct key *thread_keyring = va_arg(args, struct key *); + _iee_set_cred_thread_keyring(cred, thread_keyring); +} + +void __iee_code iee_wrapper_set_cred_request_key_auth(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + struct key *request_key_auth = va_arg(args, struct key *); + _iee_set_cred_request_key_auth(cred, request_key_auth); +} + +void __iee_code iee_wrapper_set_cred_non_rcu(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + int non_rcu = va_arg(args, int); + _iee_set_cred_non_rcu(cred, non_rcu); +} + +void __iee_code iee_wrapper_set_cred_atomic_set_usage(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + int i = va_arg(args, int); + _iee_set_cred_atomic_set_usage(cred, i); +} + +u64 __iee_code iee_wrapper_set_cred_atomic_op_usage(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + int flag = va_arg(args, int); + int nr = va_arg(args, int); + return (u64)_iee_set_cred_atomic_op_usage(cred, flag, nr); +} + +void __iee_code iee_wrapper_set_cred_security(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + void *security = va_arg(args, void *); + _iee_set_cred_security(cred, security); +} + +void __iee_code iee_wrapper_set_cred_rcu(va_list args) { + struct cred *cred = va_arg(args, struct cred *); + struct rcu_head *rcu = va_arg(args, struct rcu_head *); + _iee_set_cred_rcu(cred, rcu); +} + +void __iee_code iee_wrapper_memset(va_list args) { + void *ptr = va_arg(args, void *); + int data = va_arg(args, int); + size_t n = va_arg(args, size_t); + _iee_memset(ptr, data, n); +} + +void __iee_code iee_wrapper_set_track(va_list args) { + struct track *ptr = va_arg(args, struct track *); + struct track *data = va_arg(args, struct track *); + _iee_set_track(ptr, data); +} + +void __iee_code iee_wrapper_set_freeptr(va_list args) { + freeptr_t *pptr = va_arg(args, freeptr_t *); + freeptr_t ptr = va_arg(args, freeptr_t); + _iee_set_freeptr(pptr, ptr); +} + +void __iee_code iee_wrapper_set_pte_upage(va_list args) { + pte_t *ptep = va_arg(args, pte_t *); + pte_t pte = va_arg(args, pte_t); + _iee_set_pte_upage(ptep, pte); +} + +void __iee_code iee_wrapper_set_pte_ppage(va_list args) { + pte_t *ptep = va_arg(args, pte_t *); + pte_t pte = va_arg(args, pte_t); + _iee_set_pte_ppage(ptep, pte); +} + +void __iee_code iee_wrapper_set_token_mm(va_list args) { + struct task_struct *tsk = va_arg(args, struct task_struct *); + struct mm_struct *mm = va_arg(args, struct mm_struct *); + _iee_set_token_mm(tsk, mm); +} + +void __iee_code iee_wrapper_set_token_pgd(va_list args) { + struct task_struct *tsk = va_arg(args, struct task_struct *); + pgd_t *pgd = va_arg(args, pgd_t *); + _iee_set_token_pgd(tsk, pgd); +} + +void __iee_code iee_wrapper_init_token(va_list args) { + struct task_struct *tsk = va_arg(args, struct task_struct *); + void *kernel_stack = va_arg(args, void *); + void *iee_stack = va_arg(args, void *); + _iee_init_token(tsk, kernel_stack, iee_stack); +} + +void __iee_code iee_wrapper_free_token(va_list args) { + struct task_struct *tsk = va_arg(args, struct task_struct *); + _iee_free_token(tsk); +} + +u64 __iee_code iee_wrapper_read_token_stack(va_list args) { + struct task_struct *tsk = va_arg(args, struct task_struct *); + return (u64)_iee_read_token_stack(tsk); +} + +void __iee_code iee_wrapper_write_entry_task(va_list args) { + struct task_struct *tsk = va_arg(args, struct task_struct *); + _iee_write_entry_task(tsk); +} + +#ifdef CONFIG_KOI +u64 __iee_code iee_wrapper_read_koi_stack(va_list args) { + struct task_struct *tsk = va_arg(args, struct task_struct *); + return (u64)_iee_read_koi_stack(tsk); +} + +void __iee_code iee_wrapper_write_koi_stack(va_list args) { + struct task_struct *tsk = va_arg(args, struct task_struct *); + unsigned long koi_stack = va_arg(args, unsigned long); + _iee_write_koi_stack(tsk, koi_stack); +} + +u64 __iee_code iee_wrapper_read_token_ttbr1(va_list args) { + struct task_struct *tsk = va_arg(args, struct task_struct *); + return (u64)_iee_read_token_ttbr1(tsk); +} + +void __iee_code iee_wrapper_write_token_ttbr1(va_list args) { + struct task_struct *tsk = va_arg(args, struct task_struct *); + unsigned long current_ttbr1 = va_arg(args, unsigned long); + _iee_write_token_ttbr1(tsk, current_ttbr1); +} + +u64 __iee_code iee_wrapper_read_koi_kernel_stack(va_list args) { + struct task_struct *tsk = va_arg(args, struct task_struct *); + return (u64)_iee_read_koi_kernel_stack(tsk); +} + +void __iee_code iee_wrapper_write_koi_kernel_stack(va_list args) { + struct task_struct *tsk = va_arg(args, struct task_struct *); + unsigned long kernel_stack = va_arg(args, unsigned long); + _iee_write_koi_kernel_stack(tsk, kernel_stack); +} + +u64 __iee_code iee_wrapper_read_koi_stack_base(va_list args) { + struct task_struct *tsk = va_arg(args, struct task_struct *); + return (u64)_iee_read_koi_stack_base(tsk); +} + +void __iee_code iee_wrapper_write_koi_stack_base(va_list args) { + struct task_struct *tsk = va_arg(args, struct task_struct *); + unsigned long koi_stack_base = va_arg(args, unsigned long); + _iee_write_koi_stack_base(tsk, koi_stack_base); +} +#endif +// Define the function pointer type for wrapper functions. +// Each function pointer conforms to a standardized calling convention +// using a variable argument list (va_list) as its parameter. +// This allows dynamic invocation of different functions with various arguments. +typedef void (*iee_wrapper_func)(va_list args); +iee_wrapper_func iee_wrappers[] = { + iee_wrapper_write_in_byte, + iee_wrapper_set_pte, + iee_wrapper_set_pmd, + iee_wrapper_set_pud, + iee_wrapper_set_p4d, + iee_wrapper_set_bm_pte, + iee_wrapper_set_swapper_pgd, + iee_wrapper_set_tramp_pgd, + (iee_wrapper_func)iee_wrapper_set_cmpxchg, + (iee_wrapper_func)iee_wrapper_set_xchg, + iee_wrapper_copy_cred, + iee_wrapper_set_cred_uid, + iee_wrapper_set_cred_gid, + iee_wrapper_set_cred_suid, + iee_wrapper_set_cred_sgid, + iee_wrapper_set_cred_euid, + iee_wrapper_set_cred_egid, + iee_wrapper_set_cred_fsuid, + iee_wrapper_set_cred_fsgid, + iee_wrapper_set_cred_user, + iee_wrapper_set_cred_user_ns, + iee_wrapper_set_cred_group_info, + iee_wrapper_set_cred_securebits, + iee_wrapper_set_cred_cap_inheritable, + iee_wrapper_set_cred_cap_permitted, + iee_wrapper_set_cred_cap_effective, + iee_wrapper_set_cred_cap_bset, + iee_wrapper_set_cred_cap_ambient, + iee_wrapper_set_cred_jit_keyring, + iee_wrapper_set_cred_session_keyring, + iee_wrapper_set_cred_process_keyring, + iee_wrapper_set_cred_thread_keyring, + iee_wrapper_set_cred_request_key_auth, + iee_wrapper_set_cred_non_rcu, + iee_wrapper_set_cred_atomic_set_usage, + (iee_wrapper_func)iee_wrapper_set_cred_atomic_op_usage, + iee_wrapper_set_cred_security, + iee_wrapper_set_cred_rcu, + iee_wrapper_memset, + iee_wrapper_set_track, + iee_wrapper_set_freeptr, + iee_wrapper_set_pte_upage, + iee_wrapper_set_pte_ppage, + iee_wrapper_set_token_mm, + iee_wrapper_set_token_pgd, + iee_wrapper_init_token, + iee_wrapper_free_token, + (iee_wrapper_func)iee_wrapper_read_token_stack, + iee_wrapper_write_entry_task, + iee_wrapper_set_cred_ucounts, +#ifdef CONFIG_KOI + (iee_wrapper_func)iee_wrapper_read_koi_stack, + iee_wrapper_write_koi_stack, + (iee_wrapper_func)iee_wrapper_read_token_ttbr1, + iee_wrapper_write_token_ttbr1, + (iee_wrapper_func)iee_wrapper_read_koi_kernel_stack, + iee_wrapper_write_koi_kernel_stack, + (iee_wrapper_func)iee_wrapper_read_koi_stack_base, + iee_wrapper_write_koi_stack_base +#endif +}; + +u64 __iee_code iee_dispatch(int flag, ...){ + va_list pArgs; + + va_start(pArgs, flag); + + switch(flag) + { + case IEE_OP_SET_CMPXCHG: + { + pteval_t ret = iee_wrapper_set_cmpxchg(pArgs); + va_end(pArgs); + return (u64)ret; + } + case IEE_OP_SET_XCHG: + { + pteval_t ret = iee_wrapper_set_xchg(pArgs); + va_end(pArgs); + return (u64)ret; + } + case IEE_OP_SET_CRED_ATOP_USAGE: + { + u64 ret = iee_wrapper_set_cred_atomic_op_usage(pArgs); + va_end(pArgs); + return ret; + } + case IEE_READ_TOKEN_STACK: + { + u64 ret = iee_wrapper_read_token_stack(pArgs); + va_end(pArgs); + return ret; + } +#ifdef CONFIG_KOI + case IEE_READ_KOI_STACK: + { + u64 ret = iee_wrapper_read_koi_stack(pArgs); + va_end(pArgs); + return ret; + } + case IEE_READ_TOKEN_TTBR1: + { + u64 ret = iee_wrapper_read_token_ttbr1(pArgs); + va_end(pArgs); + return ret; + } + case IEE_READ_KOI_KERNEL_STACK: + { + u64 ret = iee_wrapper_read_koi_kernel_stack(pArgs); + va_end(pArgs); + return ret; + } + case IEE_READ_KOI_STACK_BASE: + { + u64 ret = iee_wrapper_read_koi_stack_base(pArgs); + va_end(pArgs); + return ret; + } +#endif + default: + { + #ifndef CONFIG_KOI + if((flag < IEE_WRITE_IN_BYTE) | (flag > IEE_OP_SET_CRED_UCOUNTS)) + panic("Invalid iee flag.\n"); + #else + if((flag < IEE_WRITE_IN_BYTE) | (flag > IEE_WRITE_KOI_STACK_BASE)) + panic("Invalid iee flag.\n"); + #endif + iee_wrappers[flag](pArgs); + break; + } + } + + va_end(pArgs); + return 0; +} + +#ifdef CONFIG_KOI +unsigned long __iee_code _iee_read_koi_stack(struct task_struct *tsk) +{ + struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); + return (unsigned long)token->koi_stack; +} + +void __iee_code _iee_write_koi_stack(struct task_struct *tsk, unsigned long koi_stack) +{ + struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); + token->koi_stack = koi_stack; +} + +unsigned long __iee_code _iee_read_token_ttbr1(struct task_struct *tsk) +{ + struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); + return token->current_ttbr1; +} + +void __iee_code _iee_write_token_ttbr1(struct task_struct *tsk, unsigned long current_ttbr1) +{ + struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); + token->current_ttbr1 = current_ttbr1; +} + +unsigned long __iee_code _iee_read_koi_kernel_stack(struct task_struct *tsk) +{ + struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); + return token->koi_kernel_stack; +} + +void __iee_code _iee_write_koi_kernel_stack(struct task_struct *tsk, unsigned long kernel_stack) +{ + struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); + token->koi_kernel_stack = kernel_stack; +} + +unsigned long __iee_code _iee_read_koi_stack_base(struct task_struct *tsk) +{ + struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); + return (unsigned long)token->koi_stack_base; +} + +void __iee_code _iee_write_koi_stack_base(struct task_struct *tsk, unsigned long koi_stack_base) +{ + struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); + token->koi_stack_base = koi_stack_base; +} +#endif + +// Protect the __entry_task. +__attribute__((aligned(PAGE_SIZE))) DECLARE_PER_CPU(struct task_struct *[PAGE_SIZE/sizeof(struct task_struct *)], __entry_task); +void __iee_code _iee_write_entry_task(struct task_struct *tsk) +{ + // Add check of tsk. + struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); + + unsigned long flags; + unsigned long res; + struct task_struct **entry_addr; + local_irq_save(flags); + asm volatile("at s1e1r, %0"::"r"(token)); + isb(); + res = read_sysreg(par_el1); + local_irq_restore(flags); + + // If it is logical map, that means it is not a token. + if(__phys_to_iee(res & PTE_ADDR_MASK) == (((unsigned long)token) & PTE_ADDR_MASK)) + panic("Trying to forge a token.\n"); + + if(!token->valid) + panic("Trying to write a wrong task into __entry_task.\n"); + entry_addr = (struct task_struct **)__phys_to_iee(__pa(SHIFT_PERCPU_PTR(__entry_task,__kern_my_cpu_offset()))); + *entry_addr = tsk; +} + +unsigned long __iee_code _iee_read_token_stack(struct task_struct *tsk) +{ + struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); + return (unsigned long)token->iee_stack; +} + +void __iee_code _iee_free_token(struct task_struct *tsk) +{ + _iee_memset(tsk, 0, sizeof(struct task_token)); +} + +#ifdef CONFIG_KOI +extern unsigned long koi_swapper_ttbr1; +#endif +void __iee_code _iee_init_token(struct task_struct *tsk, void *kernel_stack, void *iee_stack) +{ + struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); + token->kernel_stack = kernel_stack; + token->iee_stack = iee_stack; + token->valid = true; +#ifdef CONFIG_KOI + token->koi_kernel_stack = NULL; + token->koi_stack = NULL; + token->koi_stack_base = NULL; + token->current_ttbr1 = 0; +#endif +} + +void __iee_code _iee_set_token_mm(struct task_struct *tsk, struct mm_struct *mm) +{ + struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); + token->mm = mm; +} + +void __iee_code _iee_set_token_pgd(struct task_struct *tsk, pgd_t *pgd) +{ + struct task_token *token = (struct task_token *)__phys_to_iee(__pa(tsk)); + token->pgd = pgd; +} + +void __iee_code _iee_set_freeptr(freeptr_t *pptr, freeptr_t ptr) +{ + pptr = (freeptr_t *)__phys_to_iee(__pa(pptr)); + *pptr = ptr; +} + +#pragma GCC push_options +#pragma GCC optimize("O0") +void __iee_code _iee_memset(void *ptr, int data, size_t n) +{ + char *_ptr = (char *)__phys_to_iee(__pa(ptr)); + + while (n--) + *_ptr++ = data; +} + +void __iee_code _iee_memcpy(void *dst, void *src, size_t n) +{ + char *_dst = (char *)__phys_to_iee(__pa(dst)); + char *_src = (char *)src; + + while(n--) + *_dst++ = *_src++; +} +#pragma GCC pop_options + +void __iee_code _iee_set_track(struct track *ptr, struct track *data) +{ + _iee_memcpy(ptr, data, sizeof(struct track)); +} + +void __iee_code _iee_set_cred_rcu(struct cred *cred, struct rcu_head *rcu) +{ + if(cred == &init_cred) + cred = (struct cred *)__phys_to_iee(__pa_symbol(cred)); + else + cred = (struct cred *)__phys_to_iee(__pa(cred)); + #ifdef CONFIG_CREDP + *((struct rcu_head **)(&(cred->rcu.func))) = rcu; + #endif +} + +void __iee_code _iee_set_cred_security(struct cred *cred, void *security) +{ + if(cred == &init_cred) + cred = (struct cred *)__phys_to_iee(__pa_symbol(cred)); + else + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->security = security; +} + +bool __iee_code _iee_set_cred_atomic_op_usage(struct cred *cred, int flag, int nr) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + switch (flag) + { + case AT_ADD: { + atomic_long_add(nr, &cred->usage); + return 0; + } + case AT_INC_NOT_ZERO: { + return atomic_long_inc_not_zero(&cred->usage); + } + case AT_SUB_AND_TEST: { + return atomic_long_sub_and_test(nr, &cred->usage); + } + } + return 0; +} + +void __iee_code _iee_set_cred_atomic_set_usage(struct cred *cred, int i) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + atomic_long_set(&cred->usage,i); +} + +void __iee_code _iee_set_cred_non_rcu(struct cred *cred, int non_rcu) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->non_rcu = non_rcu; +} + +void __iee_code _iee_set_cred_session_keyring(struct cred *cred, struct key *session_keyring) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->session_keyring = session_keyring; +} + +void __iee_code _iee_set_cred_process_keyring(struct cred *cred, struct key *process_keyring) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->process_keyring = process_keyring; +} + +void __iee_code _iee_set_cred_thread_keyring(struct cred *cred, struct key *thread_keyring) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->thread_keyring = thread_keyring; +} + +void __iee_code _iee_set_cred_request_key_auth(struct cred *cred, struct key *request_key_auth) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->request_key_auth = request_key_auth; +} + +void __iee_code _iee_set_cred_jit_keyring(struct cred *cred, unsigned char jit_keyring) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->jit_keyring = jit_keyring; +} + +void __iee_code _iee_set_cred_cap_inheritable(struct cred *cred, kernel_cap_t cap_inheritable) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->cap_inheritable = cap_inheritable; +} + +void __iee_code _iee_set_cred_cap_permitted(struct cred *cred, kernel_cap_t cap_permitted) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->cap_permitted = cap_permitted; +} + +void __iee_code _iee_set_cred_cap_effective(struct cred *cred, kernel_cap_t cap_effective) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->cap_effective = cap_effective; +} + +void __iee_code _iee_set_cred_cap_bset(struct cred *cred, kernel_cap_t cap_bset) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->cap_bset = cap_bset; +} + +void __iee_code _iee_set_cred_cap_ambient(struct cred *cred, kernel_cap_t cap_ambient) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->cap_ambient = cap_ambient; +} + +void __iee_code _iee_set_cred_securebits(struct cred *cred, unsigned securebits) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->securebits = securebits; +} + +void __iee_code _iee_set_cred_group_info(struct cred *cred, struct group_info *group_info) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->group_info = group_info; +} + +void __iee_code _iee_set_cred_ucounts(struct cred *cred, struct ucounts *ucounts) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->ucounts = ucounts; +} + +void __iee_code _iee_set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->user_ns = user_ns; +} + +void __iee_code _iee_set_cred_user(struct cred *cred, struct user_struct *user) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->user = user; +} + +void __iee_code _iee_set_cred_fsgid(struct cred *cred, kgid_t fsgid) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->fsgid = fsgid; +} + +void __iee_code _iee_set_cred_fsuid(struct cred *cred, kuid_t fsuid) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->fsuid = fsuid; +} + +void __iee_code _iee_set_cred_egid(struct cred *cred, kgid_t egid) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->egid = egid; +} + +void __iee_code _iee_set_cred_euid(struct cred *cred, kuid_t euid) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->euid = euid; +} + +void __iee_code _iee_set_cred_sgid(struct cred *cred, kgid_t sgid) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->sgid = sgid; +} + +void __iee_code _iee_set_cred_suid(struct cred *cred, kuid_t suid) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->suid = suid; +} + +void __iee_code _iee_copy_cred(struct cred *old, struct cred *new) +{ + #ifdef CONFIG_CREDP + struct rcu_head *rcu = (struct rcu_head *)(new->rcu.func); + struct cred *_new = (struct cred *)__phys_to_iee(__pa(new)); + _iee_memcpy(new,old,sizeof(struct cred)); + *(struct rcu_head **)(&(_new->rcu.func)) = rcu; + *(struct rcu_head *)(_new->rcu.func) = *(struct rcu_head *)(old->rcu.func); + #endif +} + +void __iee_code _iee_set_cred_gid(struct cred *cred, kgid_t gid) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->gid = gid; +} + +void __iee_code _iee_set_cred_uid(struct cred *cred, kuid_t uid) +{ + cred = (struct cred *)__phys_to_iee(__pa(cred)); + cred->uid = uid; +} + +void __iee_code _iee_write_in_byte(void *ptr, __u64 data, int length) +{ + ptr = (void *)__phys_to_iee(__pa(ptr)); + switch(length) { + case 8: { + *(__u64 *)ptr = data; + break; + } + case 4: { + *(__u32 *)ptr = (__u32)data; + break; + } + case 2: { + *(__u16 *)ptr = (__u16)data; + break; + } + case 1: { + *(__u8 *)ptr = (__u8)data; + break; + } + } +} + +pteval_t __iee_code _iee_set_xchg_relaxed(pte_t *ptep, pteval_t pteval) +{ + pteval_t ret = xchg_relaxed((pteval_t *)(__phys_to_iee(__pa(ptep))), pteval); + return ret; +} + +pteval_t __iee_code _iee_set_cmpxchg_relaxed(pte_t *ptep, pteval_t old_pteval, pteval_t new_pteval) +{ + pteval_t pteval = cmpxchg_relaxed((pteval_t *)(__phys_to_iee(__pa(ptep))), old_pteval, new_pteval); + return pteval; +} + +/* Check if addr is allocated in IEE page */ +static inline bool check_addr_in_iee_valid(unsigned long addr) +{ + unsigned long flags; + unsigned long res; + local_irq_save(flags); + asm volatile("at s1e1r, %0"::"r"(addr)); + isb(); + res = read_sysreg(par_el1); + local_irq_restore(flags); + + // If it is not logical map, that means it is a token. + if(__phys_to_iee(res & PTE_ADDR_MASK) != addr) + return false; + + return !(res & 0x1); +} + +void __iee_code _iee_set_tramp_pgd_pre_init(pgd_t *pgdp, pgd_t pgd) +{ + WRITE_ONCE(*((pgd_t *)(__phys_to_iee(__pa_symbol(pgdp)))), pgd); +} + +void __iee_code _iee_set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) +{ + if(!(pgd_val(pgd) & PMD_SECT_VALID)) + { + WRITE_ONCE(*((pgd_t *)(__phys_to_iee(__pa_symbol(pgdp)))), pgd); + return; + } + + if ((pgd_val(pgd) & PMD_TABLE_BIT) && !check_addr_in_iee_valid(__phys_to_iee(__pgd_to_phys(pgd)))) + panic("You can't use non-iee-pgtable\n"); + + if((pgdp >= pgd_offset_pgd((pgd_t *)swapper_pg_dir, PAGE_OFFSET + BIT(vabits_actual - 2))) && (pgdp < pgd_offset_pgd((pgd_t *)swapper_pg_dir, PAGE_OFFSET + BIT(vabits_actual - 1))) && !(pgd_val(pgd) & PGD_APT)) + panic("Set IEE pgd U page.\n"); + + WRITE_ONCE(*((pgd_t *)(__phys_to_iee(__pa_symbol(pgdp)))), pgd); +} + +void __iee_code _iee_set_p4d(p4d_t *p4dp, p4d_t p4d) +{ + if(!(p4d_val(p4d) & PMD_SECT_VALID)) + { + WRITE_ONCE(*((p4d_t *)(__phys_to_iee(__pa(p4dp)))), p4d); + return; + } + + if ((p4d_val(p4d) & PMD_TABLE_BIT) && !check_addr_in_iee_valid(__phys_to_iee(__p4d_to_phys(p4d)))) + panic("You can't use non-iee-pgtable\n"); + + WRITE_ONCE(*((p4d_t *)(__phys_to_iee(__pa(p4dp)))), p4d); +} + +void __iee_code _iee_set_pud(pud_t *pudp, pud_t pud) +{ + if(!(pud_val(pud) & PMD_SECT_VALID)) + { + WRITE_ONCE(*((pud_t *)(__phys_to_iee(__pa(pudp)))), pud); + return; + } + + if ((pud_val(pud) & PMD_TABLE_BIT) && !check_addr_in_iee_valid(__phys_to_iee(__pud_to_phys(pud)))) + panic("You can't use non-iee-pgtable\n"); + + WRITE_ONCE(*((pud_t *)(__phys_to_iee(__pa(pudp)))), pud); +} + +// Return true if the modify does not break DEP. +static inline bool check_pmd_dep(char *addr, pmd_t pmd) +{ + // DEP for kernel code and readonly data + // _text: .text start addr, __init_begin: .rodata end addr + if (addr >= _stext && addr < _etext) + { + if ((PTE_WRITE & pmd_val(pmd)) || // DBM == 1 --> writable + !(PTE_RDONLY & pmd_val(pmd))) // DBM == 0 && AP[2] = 0 --> writable + { + panic("Can't make kernel's text/readonly page as writable!\n" + "addr = 0x%16llx, pmd_val = 0x%16llx", + (u64)addr, pmd_val(pmd)); + } + } + return true; +} + +// Return true if the pmd table is a part of kernel page table. +// TODO : Optimize to get lower overhead. +static inline bool is_kernel_pmd_table(pmd_t *pmdp, pmd_t pmd) +{ + int i = 0,j = 0; + for(i = 0; i < PAGE_SIZE/sizeof(pgd_t); i++) + { + pgd_t *pgdp = (pgd_t *)swapper_pg_dir + i; + if((pgd_val(*pgdp) & PMD_SECT_VALID) && (pgd_val(*pgdp) & PMD_TABLE_BIT)) + { + for(j = 0; j < PAGE_SIZE/sizeof(pud_t); j++) + { + pud_t *pudp = (pud_t *)__va(__pgd_to_phys(*pgdp)) + i; + if((pud_val(*pudp) & PMD_SECT_VALID) && (pud_val(*pudp) & PMD_TABLE_BIT)) + { + pmd_t *current_pmdp = __va(__pud_to_phys(*pudp)); + if((unsigned long)current_pmdp == ((unsigned long)pmdp & PAGE_MASK)) + return true; + } + } + } + } + return false; +} + +// Return true if it is mapped to a physical range containing IEE page. +// TODO : Optimize to get lower overhead. +static inline bool check_addr_range_in_iee_valid(pmd_t pmd) +{ + int i = 0; + unsigned long addr = __phys_to_iee(__pmd_to_phys(pmd)); + for(i = 0; i < PAGE_SIZE/sizeof(pmd_t); i++) + { + if(check_addr_in_iee_valid(addr + PAGE_SIZE * i)) + return true; + } + return false; +} + +void __iee_code _iee_set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + char * addr = (char *)__phys_to_kimg(__pmd_to_phys(pmd)); + + if(!(pmd_val(pmd) & PMD_SECT_VALID)) + { + WRITE_ONCE(*((pmd_t *)(__phys_to_iee(__pa(pmdp)))), pmd); + return; + } + + // Check if the pte table is legally allocated. + if ((pmd_val(pmd) & PMD_TABLE_BIT) && !check_addr_in_iee_valid(__phys_to_iee(__pmd_to_phys(pmd)))) + panic("You can't use non-iee-pgtable\n"); + + // Avoid mapping a huge pmd as U page. + if(!(pmd_val(pmd) & PMD_TABLE_BIT) && (pmd_val(pmd) & PMD_SECT_USER) && is_kernel_pmd_table(pmdp, pmd)) + panic("Set a block descriptor in kernel space U page.\n"); + + // Avoid mapping a huge pmd to IEE physical page. + if(!(pmd_val(pmd) & PMD_TABLE_BIT) && check_addr_range_in_iee_valid(pmd)) + panic("Mapping IEE physical page to a huge pmd.\n"); + + if(!check_pmd_dep(addr, pmd)) + return; + + WRITE_ONCE(*((pmd_t *)(__phys_to_iee(__pa(pmdp)))), pmd); +} + +// Return true if the pte table is a part of kernel page table. +// TODO : Optimize to get lower overhead. +static inline bool is_kernel_pte_table(pte_t *ptep, pte_t pte) +{ + return false; +} + +// Return true if it does not change the privilage or add new U page in kernel. +static inline bool check_privilage_safe(pte_t *ptep, pte_t pte) +{ + if(!(pte_val(pte) & PTE_VALID)) + return true; + + if((pte_val(*ptep) & PTE_VALID)) + { + if((pte_val(*ptep) & PTE_USER) != (pte_val(pte) & PTE_USER)) + panic("Incorrectly change privilage.\n"); + } + else + { + if((pte_val(pte) & PTE_USER) && is_kernel_pte_table(ptep, pte)) + panic("Add new U page in kernel space.\n"); + } + return true; +} + +// TODO : When adding a new executable page, check it for DEP. +static inline bool safely_adding_new_exec_page(pte_t *ptep, pte_t pte) +{ + return true; +} + +// Return true if it is only changing prot of a pte. +static inline bool is_changing_pte_prot(pte_t *ptep, pte_t pte) +{ + if(((pte_val(*ptep) ^ pte_val(pte)) & PTE_ADDR_MASK) == 0) + return true; + else + return false; +} + +// Return true if the modify does not break DEP. +static inline bool check_pte_dep(char *addr, pte_t pte) +{ + // DEP for kernel code and readonly data + // _text: .text start addr, __init_begin: .rodata end addr + if (addr >= _stext && addr < _etext) + { + if ((PTE_WRITE & pte_val(pte)) // DBM == 1 --> writable + || !(PTE_RDONLY & pte_val(pte))) // DBM == 0 && AP[2] = 0 --> writable + { + panic("Can't make kernel's text/readonly page as writable!\n" + "addr = 0x%16llx, pte_val = 0x%16llx", + (u64)addr, pte_val(pte)); + } + } + return true; +} + +void __iee_code _iee_set_pte(pte_t *ptep, pte_t pte) +{ + char * addr = (char *)__phys_to_kimg(__pte_to_phys(pte)); + + if(!(pte_val(pte) & PTE_VALID)) + { + WRITE_ONCE(*((pte_t *)(__phys_to_iee(__pa(ptep)))), pte); + return; + } + + // Avoid modify privilage unsafely. + if(!check_privilage_safe(ptep, pte)) + panic("You are modify privilage unsafely.\n"); + + // Avoid mapping a new executable page. + if(!safely_adding_new_exec_page(ptep, pte)) + panic("You are adding a new executable page unsafely.\n"); + + // Avoid mapping a new VA to IEE PA. + if(!is_changing_pte_prot(ptep, pte) && + check_addr_in_iee_valid(__phys_to_iee(__pte_to_phys(pte)))) + panic("You are remmaping IEE page to other VA.\n"); + + // Avoid mapping a writable VA to kernel code PA. + if(!check_pte_dep(addr, pte)) + return; + + WRITE_ONCE(*((pte_t *)(__phys_to_iee(__pa(ptep)))), pte); +} + +// Return true if it only sets U page and modify NG. +static inline bool is_setting_upage(pte_t *ptep, pte_t pte) +{ + if(((pte_val(*ptep) ^ pte_val(pte)) & ~(PTE_USER | PTE_NG)) != 0) + panic("Incorrectly setting U page.\n"); + if((pte_val(pte) & PTE_USER) != PTE_USER) + panic("Using error interface to set P page.\n"); + return true; +} + +void __iee_code _iee_set_pte_upage(pte_t *ptep, pte_t pte) +{ + // Check if it only change the prot. + if(!is_setting_upage(ptep,pte)) + panic("Incorrectly setting U page.\n"); + + WRITE_ONCE(*((pte_t *)(__phys_to_iee(__pa(ptep)))), pte); +} + +// Return true if it only sets P page and modify NG. +static inline bool is_setting_ppage(pte_t *ptep, pte_t pte) +{ + if(((pte_val(*ptep) ^ pte_val(pte)) & ~(PTE_USER | PTE_NG)) != 0) + panic("Incorrectly setting P page.\n"); + if((pte_val(pte) & PTE_USER) != 0) + panic("Using error interface to set U page.\n"); + return true; +} + +void __iee_code _iee_set_pte_ppage(pte_t *ptep, pte_t pte) +{ + // Check if it only change the prot. + if(!is_setting_ppage(ptep,pte)) + panic("Incorrectly setting P page.\n"); + + WRITE_ONCE(*((pte_t *)(__phys_to_iee(__pa(ptep)))), pte); +} + +void __iee_code _iee_set_bm_pte(pte_t *ptep, pte_t pte) +{ + WRITE_ONCE(*((pte_t *)(__phys_to_iee(__pa_symbol(ptep)))), pte); +} + +/* Data in iee_si_base is visible to all pgd while iee_si_data is private. */ +unsigned long iee_base_idmap_pg_dir __iee_si_data; +unsigned long iee_base_reserved_pg_dir __iee_si_data; +unsigned long iee_base__bp_harden_el1_vectors __iee_si_data; +bool iee_init_done __iee_si_data; +unsigned long iee_si_tcr __iee_si_data; + +static u64 __iee_si_code inline iee_si_mask(unsigned long mask, unsigned long new_val, unsigned long old_val) +{ + return (new_val & mask) | (old_val & ~mask); +} +/* + * handler function for requests of executing sensitive instrutions. + */ +u64 __iee_si_code iee_si_handler(int flag, ...) +{ + va_list pArgs; + u64 old_val, new_val; + + // BUG_ON(flag > IEE_WRITE_MDSCR); + va_start(pArgs, flag); + switch (flag) { + case IEE_SI_TEST: + break; + case IEE_WRITE_SCTLR: { + old_val = read_sysreg(sctlr_el1); + new_val = va_arg(pArgs, u64); + new_val = iee_si_mask(IEE_SCTLR_MASK, new_val, old_val); + write_sysreg(new_val, sctlr_el1); + break; + } + case IEE_WRITE_TTBR0: + case IEE_CONTEXT_SWITCH: { + u64 new_asid, new_phys, old_phys, token_phys; + struct task_struct *tsk; + struct task_token *token; + new_val = va_arg(pArgs, u64); + new_phys = (new_val & PAGE_MASK) & ~TTBR_ASID_MASK; + new_asid = new_val >> 48; + + // Check ASID first + if (new_phys == iee_base_reserved_pg_dir){ + if (new_asid != 1) + panic("IEE SI warning: reserved_pg_dir ASID invalid: %llx:%llx", new_asid, new_val); + } + // Already reserved asid 1 for iee rwx gate. + else if (new_asid == 0){ + new_val |= FIELD_PREP(TTBR_ASID_MASK, 1); + printk("IEE SI: Modify ASID of %llx to 1.", new_val); + } + // TO DO: operations to protect idmap_pg_dir + else if (new_phys == iee_base_idmap_pg_dir) + { + // printk("IEE SI: switch to idmap_pg_dir.); + } + else if (new_asid % 2 ==0) + panic("IEE SI warning: TTBR0 ASID invalid: %llx:%llx", new_asid, new_val); + + /* Skip verification if iee hasn't been initialized. */ + if (iee_init_done){ + // Verify current sp_el0 with iee token info + asm volatile("mrs %x0, sp_el0":"=r"(tsk)); + token = (struct task_token *)__phys_to_iee(__pa(tsk)); + + /* + * token->pgd != NULL means it is a user task, then we need to check whether current ttbr0 is correct. + */ + if (token->pgd){ + old_val = read_sysreg(ttbr0_el1); + // When TTBR0 is reserved_pg_dir then no checking is available. + if (old_val != iee_base_reserved_pg_dir){ + old_phys = (old_val & PAGE_MASK) & ~TTBR_ASID_MASK; + token_phys = __pa(token->pgd); + if (old_phys != token_phys) + panic("IEE SI warning: Pgd set error. old ttbr0:%lx, token ttbr0:%lx, token pgd:%lx", + (unsigned long)old_phys, (unsigned long)token_phys, (unsigned long)(token->pgd)); + } + } + } + // all checks are done. + write_sysreg(new_val, ttbr0_el1); + + // SET ASID in TTBR1 when context switch + if (flag == IEE_CONTEXT_SWITCH){ + new_val = (read_sysreg(ttbr1_el1) & ~TTBR_ASID_MASK) | FIELD_PREP(TTBR_ASID_MASK, new_asid-1); + write_sysreg(new_val, ttbr1_el1); + } + break; + } + case IEE_WRITE_VBAR: { + u64 el1_vector; + new_val = va_arg(pArgs, u64); + el1_vector = iee_base__bp_harden_el1_vectors; + if(new_val == el1_vector || new_val == el1_vector+SZ_2K || + new_val == el1_vector+SZ_2K*2 || new_val == el1_vector+SZ_2K*3) + write_sysreg(new_val, vbar_el1); + break; + } + case IEE_WRITE_TCR: { + old_val = read_sysreg(tcr_el1); + new_val = va_arg(pArgs, u64); + new_val = iee_si_mask(IEE_TCR_MASK, new_val, old_val); + write_sysreg(new_val, tcr_el1); + break; + } + case IEE_WRITE_MDSCR: { + old_val = read_sysreg(mdscr_el1); + new_val = va_arg(pArgs, u64); + new_val = iee_si_mask(IEE_MDSCR_MASK, new_val, old_val); + write_sysreg(new_val, mdscr_el1); + break; + } + } + va_end(pArgs); + return 0; +} +/* + * TODO: scan a page to check whether it contains sensitive instructions + * return 1 when finding sensitive inst, 0 on safe page. + */ +int iee_si_scan_page(unsigned long addr); +#endif \ No newline at end of file diff --git a/arch/arm64/kernel/koi/Makefile b/arch/arm64/kernel/koi/Makefile new file mode 100644 index 000000000000..9be8710b714a --- /dev/null +++ b/arch/arm64/kernel/koi/Makefile @@ -0,0 +1 @@ +obj-y += koi.o \ No newline at end of file diff --git a/arch/arm64/kernel/koi/koi.c b/arch/arm64/kernel/koi/koi.c new file mode 100644 index 000000000000..716ba16ab358 --- /dev/null +++ b/arch/arm64/kernel/koi/koi.c @@ -0,0 +1,1327 @@ +#include "asm/koi.h" +#include "linux/compiler_attributes.h" +#include "linux/compiler_types.h" +#include "asm/barrier.h" +#include "asm-generic/bug.h" +#include "asm-generic/errno-base.h" +#include "asm-generic/memory_model.h" +#include "asm-generic/pgtable-nop4d.h" +#include "asm-generic/rwonce.h" +#include "asm/pgalloc.h" +#include "asm/memory.h" +#include "linux/bitfield.h" +#include "linux/compiler.h" +#include "linux/types.h" +#include "linux/spinlock.h" +#include "linux/spinlock_types.h" +#include "linux/kernel.h" +#include "linux/rculist.h" +#include "linux/rcupdate.h" +#include "linux/list.h" +#include "asm/current.h" +#include "linux/compiler_types.h" +#include "asm-generic/barrier.h" +#include "asm-generic/rwonce.h" +#include "asm-generic/pgalloc.h" +#include "asm/cpufeature.h" +#include "asm/kvm_hyp.h" +#include "asm/mmu.h" +#include "asm/mmu_context.h" +#include "asm/page-def.h" +#include "asm/pgalloc.h" +#include "asm/pgtable-hwdef.h" +#include "asm/pgtable-types.h" +#include "asm/pgtable.h" +#include "asm/string.h" +#include "asm/sysreg.h" +#include "linux/bitfield.h" +#include "linux/compiler.h" +#include "linux/export.h" +#include "linux/gfp.h" +#include "linux/huge_mm.h" +#include "linux/kallsyms.h" +#include "linux/kconfig.h" +#include "linux/kern_levels.h" +#include "linux/kernel.h" +#include "linux/list.h" +#include "linux/lockdep.h" +#include "linux/mm.h" +#include "linux/mm_types.h" +#include "linux/pgtable.h" +#include "linux/printk.h" +#include "linux/rculist.h" +#include "linux/rcupdate.h" +#include "linux/rmap.h" +#include "linux/sched.h" +#include "linux/stddef.h" +#include "linux/string.h" +#include "linux/swap.h" +#include "linux/swapops.h" +#include "linux/types.h" +#include "linux/slab.h" +#include "linux/string.h" +#include "linux/hashtable.h" + +#define __koi_code __section(".koi.text") +#define __koi_data __section(".data..koi") + +extern unsigned long __koi_code_start[]; +extern unsigned long __koi_code_end[]; +extern unsigned long __koi_data_start[]; +extern unsigned long __koi_data_end[]; +#ifdef CONFIG_IEE +extern unsigned long __iee_si_base_start[]; +extern unsigned long __iee_exec_entry_start[]; +extern unsigned long __iee_exec_entry_end[]; +#endif + +__koi_data unsigned long koi_swapper_ttbr1 = 0; +EXPORT_SYMBOL(koi_swapper_ttbr1); +#define KOI_SWAPPER_MASK 0x0000fffffffffff0 + +__attribute__((aligned(PAGE_SIZE))) +DEFINE_PER_CPU(unsigned long[PAGE_SIZE / sizeof(unsigned long)], + koi_irq_current_ttbr1); +EXPORT_SYMBOL(koi_irq_current_ttbr1); + +extern void koi_switch_to_ko_stack(unsigned long stack_top); +extern void init_ko_mm(struct mm_struct *ko_mm, pgd_t *pgdp); +extern void koi_check_and_switch_context(struct mm_struct *mm); +extern int koi_add_page_mapping(unsigned long dst, unsigned long src); +extern unsigned long _iee_read_token_ttbr1(struct task_struct *tsk); +/** +*struct koi_mem_list - maintain a linked list of free memory in the kernel +*@addr: stating address of this memory +*@size: the size of the memory +*@list: the head of the koi_mem_list +*@rcu: for rcu +*/ +struct koi_mem_list { + unsigned long addr; + unsigned long size; + struct list_head list; + struct rcu_head rcu; +}; +//mapping parameter pointer to copy +struct koi_addr_map { + unsigned long buffer_addr; + unsigned long orig_addr; + int offset; + struct hlist_node node; + struct rcu_head rcu; +}; + +DEFINE_HASHTABLE(koi_mem_htbl, HASH_TABLE_BIT); +EXPORT_SYMBOL(koi_mem_htbl); +DEFINE_SPINLOCK(koi_mem_htbl_spin_lock); +EXPORT_SYMBOL(koi_mem_htbl_spin_lock); + +EXPORT_SYMBOL(koi_do_switch_to_ko_stack); +EXPORT_SYMBOL(koi_do_switch_to_kernel_stack); + +extern unsigned long long iee_rw_gate(int flag, ...); + +/** +* koi_ttbr_ctor - return ttbr1 for the given driver module +*/ +unsigned long koi_ttbr_ctor(struct module *mod) +{ + struct koi_mem_hash_node *ko; + struct mm_struct *ko_mm; + unsigned long ttbr1; + unsigned long asid; + int bkt; + rcu_read_lock(); + hash_for_each_rcu (koi_mem_htbl, bkt, ko, node) { + if (ko->mod == mod) { + ko_mm = ko->ko_mm; + break; + } + } + rcu_read_unlock(); + if (!ko_mm) { + printk(KERN_ERR "cannot found module %s in koi_mem_htbl", + mod->name); + return 0; + } + asm volatile("mrs %0, ttbr0_el1\n":"=r"(asid):); + asid &= TTBR_ASID_MASK; + ttbr1 = ko->ko_ttbr1 | asid; + // koi_check_and_switch_context(ko_mm); + // asid = ASID(ko_mm); + // ttbr1 = ko->ko_ttbr1 | FIELD_PREP(TTBR_ASID_MASK, asid); + return ttbr1; +} +EXPORT_SYMBOL(koi_ttbr_ctor); +//release the hash node +static __maybe_unused void koi_mem_hash_node_free(struct rcu_head *rcu) +{ + struct koi_mem_hash_node *node = + container_of(rcu, struct koi_mem_hash_node, rcu); + kfree(node); +} +//release free memory linked list nodes +static void koi_mem_node_free(struct rcu_head *rcu) +{ + struct koi_mem_list *mem_node = + container_of(rcu, struct koi_mem_list, rcu); + kfree(mem_node); +} +//release the node in koi_addr_map +static void koi_addr_map_node_free(struct rcu_head *rcu) +{ + struct koi_addr_map *addr_map_node = + container_of(rcu, struct koi_addr_map, rcu); + kfree(addr_map_node); +} + +#ifndef CONFIG_IEE +/* + * This function is used to switch to ko's pgtable. + */ +__koi_code noinline unsigned long koi_do_switch_to_ko_pgtbl(void) +{ + struct koi_mem_hash_node *ko; + // struct mm_struct *ko_mm; + unsigned long addr; + unsigned long ttbr1, asid; + unsigned long *ptr; + struct task_token *token_addr = + (struct task_token *)((unsigned long)current + + (unsigned long)koi_offset); + int bkt; + asm volatile(" mrs %0, elr_el1\n" : "=r"(addr)); + ptr = SHIFT_PERCPU_PTR(koi_irq_current_ttbr1, __kern_my_cpu_offset()); + rcu_read_lock(); + hash_for_each_rcu (koi_mem_htbl, bkt, ko, node) { + if (ko->mod->init_layout.base != NULL) { + if (addr >= (unsigned long)ko->mod->init_layout.base && + addr < (unsigned long)(ko->mod->init_layout.base + + ko->mod->init_layout.size)) { + if (token_addr->current_ttbr1 == ko->ko_ttbr1 || + *ptr == ko->ko_ttbr1) { + // ko_mm = ko->ko_mm; + // koi_check_and_switch_context(ko_mm); + // asid = ASID(ko_mm); + // ttbr1 = ko->ko_ttbr1; + // ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, asid); + asm volatile("mrs %0, ttbr0_el1\n":"=r"(asid):); + asid &= TTBR_ASID_MASK; + ttbr1 = ko->ko_ttbr1 | asid; + rcu_read_unlock(); + return ttbr1; + } + rcu_read_unlock(); + return 0; + } + } + if (addr >= (unsigned long)ko->mod->core_layout.base && + addr < (unsigned long)ko->mod->core_layout.base + + ko->mod->core_layout.size) { + if (token_addr->current_ttbr1 == ko->ko_ttbr1 || + *ptr == ko->ko_ttbr1) { + // ko_mm = ko->ko_mm; + // koi_check_and_switch_context(ko_mm); + // asid = ASID(ko_mm); + // ttbr1 = ko->ko_ttbr1; + // ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, asid); + asm volatile("mrs %0, ttbr0_el1\n":"=r"(asid):); + asid &= TTBR_ASID_MASK; + ttbr1 = ko->ko_ttbr1 | asid; + rcu_read_unlock(); + return ttbr1; + } + rcu_read_unlock(); + return 0; + } + } + rcu_read_unlock(); + return 0; +} +/** +* koi_do_switch_to_kernel_pgtbl - switch to kernel pagetable +*/ +__koi_code noinline int koi_do_switch_to_kernel_pgtbl(void) +{ + unsigned long curr_ttbr1, asid; + // if (!cpu_online(smp_processor_id())) + // return 0; + asm volatile(" mrs %0, ttbr1_el1\n" : "=r"(curr_ttbr1)); + if ((curr_ttbr1 & KOI_SWAPPER_MASK) == + (koi_swapper_ttbr1 & KOI_SWAPPER_MASK)) { + return 0; + } + if (((curr_ttbr1 & TTBR_ASID_MASK) >> 48) <= 1) { + return 0; + } + asm volatile("mrs %0, ttbr0_el1\n":"=r"(asid):); + asid &= ~USER_ASID_FLAG; + asid &= TTBR_ASID_MASK; + write_sysreg(koi_swapper_ttbr1 | asid, ttbr1_el1); + isb(); + asm volatile(ALTERNATIVE("nop; nop; nop", "ic iallu; dsb nsh; isb", + ARM64_WORKAROUND_CAVIUM_27456)); + return 1; +} +#else +__koi_code noinline unsigned long koi_do_switch_to_ko_pgtbl(void) +{ + struct koi_mem_hash_node *ko; + struct mm_struct *ko_mm; + unsigned long addr, ttbr1, asid, pan_flag, current_ttbr1; + unsigned long *ptr; + int bkt; + asm volatile("mrs %0, pan\n" + "msr pan, 0x0\n" + : "=r"(pan_flag) + :); + current_ttbr1 = _iee_read_token_ttbr1(current); + asm volatile("msr pan, %0\n" : : "r"(pan_flag)); + ptr = SHIFT_PERCPU_PTR(koi_irq_current_ttbr1, __kern_my_cpu_offset()); + if (current_ttbr1 == 0 && *ptr == 0) + return 0; + asm volatile(" mrs %0, elr_el1\n" : "=r"(addr)); + rcu_read_lock(); + hash_for_each_rcu (koi_mem_htbl, bkt, ko, node) { + if (ko->mod->init_layout.base != NULL) { + if (addr >= (unsigned long)ko->mod->init_layout.base && + addr < (unsigned long)(ko->mod->init_layout.base + + ko->mod->init_layout.size)) { + rcu_read_unlock(); + if (current_ttbr1 == ko->ko_ttbr1 || *ptr == ko->ko_ttbr1) { + // ko_mm = ko->ko_mm; + // koi_check_and_switch_context(ko_mm); + // asid = ASID(ko_mm); + // ttbr1 = ko->ko_ttbr1; + // ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, + // asid); + return ko->ko_ttbr1; + } + return 0; + } + } + if (addr >= (unsigned long)ko->mod->core_layout.base && + addr < (unsigned long)ko->mod->core_layout.base + + ko->mod->core_layout.size) { + rcu_read_unlock(); + if (current_ttbr1 == ko->ko_ttbr1 || *ptr == ko->ko_ttbr1) { + // ko_mm = ko->ko_mm; + // koi_check_and_switch_context(ko_mm); + // asid = ASID(ko_mm); + // ttbr1 = ko->ko_ttbr1; + // ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, asid); + return ko->ko_ttbr1; + } + return 0; + } + } + rcu_read_unlock(); + return 0; +} + +__koi_code noinline int koi_do_switch_to_kernel_pgtbl(void) +{ + unsigned long curr_ttbr1; + // if (!cpu_online(smp_processor_id())) + // return 0; + asm volatile(" mrs %0, ttbr1_el1\n" : "=r"(curr_ttbr1)); + if ((curr_ttbr1 & KOI_SWAPPER_MASK) == + (koi_swapper_ttbr1 & KOI_SWAPPER_MASK)) { + return 0; + } + if (((curr_ttbr1 & TTBR_ASID_MASK) >> 48) <= 1) { + return 0; + } + iee_rwx_gate_entry(IEE_SWITCH_TO_KERNEL); + return 1; +} +#endif +/** +* koi_save_ttbr - save ttbr of each driver module +* @mod: driver module +* @pgdp:pointer to driver module top page table,pgd +*/ +static void koi_save_ttbr(struct module *mod, pgd_t *pgdp, + struct koi_mem_hash_node *node) +{ + phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp)); + if (system_supports_cnp()) + ttbr1 |= TTBR_CNP_BIT; + node->ko_ttbr1 = ttbr1; +} +/** +*kio_normal_page - to obtain the pointer of the corresponding struct page structure +*from a given page table entry(pte) +*/ +struct page *koi_normal_page(pte_t pte) +{ + unsigned long pfn = pte_pfn(pte); + + if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) { + if (likely(!pte_special(pte))) + goto check_pfn; + if (is_zero_pfn(pfn)) { + printk(KERN_ERR "zero pfn found! pte=0x%16lx\n", pte); + return NULL; + } + if (pte_devmap(pte)) { + printk(KERN_ERR "pte for dev found! pte=0x%16lx\n", + pte); + return NULL; + } + return NULL; + } + +check_pfn: + return pfn_to_page(pfn); +} + +/** + * Copy one pte. Returns 0 if succeeded, or -EAGAIN if one preallocated page + * is required to copy this pte. +*/ +static inline int koi_copy_present_pte(pte_t *dst_pte, pte_t *src_pte, + unsigned long addr, + struct page **prealloc) +{ + pte_t pte = *src_pte; + struct page *page; + + page = koi_normal_page(pte); + if (!page) { + printk(KERN_ERR "pte_page unavailable. Impossible.....\n"); + return -1; + } + + set_pte(dst_pte, pte); + return 0; +} +/** +* copy huge pmd from kernel space to driver space. +*/ +static int koi_copy_huge_pmd(struct mm_struct *ko_mm, pmd_t *dst_pmd, + pmd_t *src_pmd, unsigned long addr) +{ + spinlock_t *src_ptl; + pmd_t pmd; + int ret = -ENOMEM; + + src_ptl = pmd_lockptr(&init_mm, src_pmd); + spin_lock_bh(src_ptl); + + ret = -EAGAIN; + pmd = *src_pmd; + + set_pte((pte_t *)dst_pmd, pmd_pte(pmd)); + ret = 0; + spin_unlock_bh(src_ptl); + return ret; +} + +int __koi_pte_alloc(struct mm_struct *mm, pmd_t *pmd) +{ + spinlock_t *ptl; + pgtable_t new = pte_alloc_one(mm); + if (!new) + return -ENOMEM; + + /* + * Ensure all pte setup (eg. pte page lock and page clearing) are + * visible before the pte is made visible to other CPUs by being + * put into page tables. + * + * The other side of the story is the pointer chasing in the page + * table walking code (when walking the page table without locking; + * ie. most of the time). Fortunately, these data accesses consist + * of a chain of data-dependent loads, meaning most CPUs (alpha + * being the notable exception) will already guarantee loads are + * seen in-order. See the alpha page table accessors for the + * smp_rmb() barriers in page table walking code. + */ + smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ + + ptl = pmd_lockptr(mm, pmd); + spin_lock_bh(ptl); + if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ + #ifdef CONFIG_PTP + pte_t *pte = (pte_t *)page_address(new); + unsigned long iee_addr = __phys_to_iee(__pa(pte)); + set_iee_page_valid(iee_addr); + iee_set_logical_mem_ro((unsigned long)pte); + #endif + mm_inc_nr_ptes(mm); + pmd_populate(mm, pmd, new); + new = NULL; + } + spin_unlock_bh(ptl); + if (new) + pte_free(mm, new); + return 0; +} + +#define koi_pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __koi_pte_alloc(mm, pmd)) + +#define koi_pte_offset_map_lock(mm, pmd, address, ptlp) \ +({ \ + spinlock_t *__ptl = pte_lockptr(mm, pmd); \ + pte_t *__pte = pte_offset_map(pmd, address); \ + *(ptlp) = __ptl; \ + spin_lock_bh(__ptl); \ + __pte; \ +}) + +#define koi_pte_alloc_map_lock(mm, pmd, address, ptlp) \ + (koi_pte_alloc(mm, pmd) ? \ + NULL : koi_pte_offset_map_lock(mm, pmd, address, ptlp)) + +/** +*koi_copy_pte_range - copy pte from kernel space to driver space +*/ +static int koi_copy_pte_range(struct mm_struct *ko_mm, pmd_t *dst_pmd, + pmd_t *src_pmd, unsigned long addr, + unsigned long end) +{ + pte_t *src_pte, *dst_pte; + spinlock_t *src_ptl, *dst_ptl; + int ret = 0; + struct page *prealloc = NULL; +again: + dst_pte = koi_pte_alloc_map_lock(ko_mm, dst_pmd, addr, &dst_ptl); + if (!dst_pte) { + ret = -ENOMEM; + goto out; + } + src_pte = pte_offset_map(src_pmd, addr); + src_ptl = pte_lockptr(&init_mm, src_pmd); + spin_lock_bh(src_ptl); + arch_enter_lazy_mmu_mode(); + + do { + if (pte_none(*src_pte)) + continue; + if (unlikely(!pte_present(*src_pte))) { + continue; + } + /* koi_copy_present_pte() will clear `*prealloc` if consumed */ + ret = koi_copy_present_pte(dst_pte, src_pte, addr, &prealloc); + if (unlikely(ret == -EAGAIN)) + break; + if (unlikely(prealloc)) { + put_page(prealloc); + prealloc = NULL; + } + } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); + arch_leave_lazy_mmu_mode(); + spin_unlock_bh(src_ptl); + spin_unlock_bh(dst_ptl); + + if (ret) { + WARN_ON_ONCE(ret != -EAGAIN); + ret = 0; + } + if (addr != end) + goto again; +out: + if (unlikely(prealloc)) + put_page(prealloc); + return ret; +} + +int __koi_pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) +{ + spinlock_t *ptl; + pmd_t *new = pmd_alloc_one(mm, address); + if (!new) + return -ENOMEM; + + smp_wmb(); /* See comment in __pte_alloc */ + + ptl = pud_lockptr(mm, pud); + spin_lock_bh(ptl); + if (!pud_present(*pud)) { + #ifdef CONFIG_PTP + unsigned long iee_addr = __phys_to_iee(__pa(new)); + set_iee_page_valid(iee_addr); + iee_set_logical_mem_ro((unsigned long)new); + #endif + mm_inc_nr_pmds(mm); + pud_populate(mm, pud, new); + } else /* Another has populated it */ + pmd_free(mm, new); + spin_unlock_bh(ptl); + return 0; +} + +static inline pmd_t *koi_pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) +{ + return (unlikely(pud_none(*pud)) && __koi_pmd_alloc(mm, pud, address))? + NULL: pmd_offset(pud, address); +} + +/** +*kio_copy_pmd_range - copy pmd from kernel to driver space +*/ +static inline int koi_copy_pmd_range(struct mm_struct *ko_mm, pud_t *dst_pud, + pud_t *src_pud, unsigned long addr, + unsigned long end) +{ + pmd_t *src_pmd, *dst_pmd; + unsigned long next; + int err; + + dst_pmd = koi_pmd_alloc(ko_mm, dst_pud, addr); + if (!dst_pmd) { + return -ENOMEM; + } + src_pmd = pmd_offset(src_pud, addr); + do { + next = pmd_addr_end(addr, end); + // CONFIG_TRANSPARENT_HUGEPAGE is enabled, so we must add copy_huge_pmd + if (is_swap_pmd(*src_pmd) || pmd_trans_huge(*src_pmd) || + (pmd_devmap(*src_pmd))) { + err = koi_copy_huge_pmd(ko_mm, dst_pmd, src_pmd, addr); + if (err == -ENOMEM) + return -ENOMEM; + if (!err) + continue; + } + if (pmd_none_or_clear_bad(src_pmd)) { + continue; + } + if (koi_copy_pte_range(ko_mm, dst_pmd, src_pmd, addr, next)) + return -ENOMEM; + } while (dst_pmd++, src_pmd++, addr = next, addr != end); + return 0; +} + +int __koi_pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) +{ + pud_t *new = pud_alloc_one(mm, address); + if (!new) + return -ENOMEM; + + smp_wmb(); /* See comment in __pte_alloc */ + + spin_lock_bh(&mm->page_table_lock); + if (!p4d_present(*p4d)) { + #ifdef CONFIG_PTP + unsigned long iee_addr = __phys_to_iee(__pa(new)); + set_iee_page_valid(iee_addr); + iee_set_logical_mem_ro((unsigned long)new); + #endif + mm_inc_nr_puds(mm); + p4d_populate(mm, p4d, new); + } else /* Another has populated it */ + pud_free(mm, new); + spin_unlock_bh(&mm->page_table_lock); + return 0; +} + +static inline pud_t *koi_pud_alloc(struct mm_struct *mm, p4d_t *p4d, + unsigned long address) +{ + return (unlikely(p4d_none(*p4d)) && __koi_pud_alloc(mm, p4d, address)) ? + NULL : pud_offset(p4d, address); +} + +/** +*koi_copy_pud_range - copy pud from kernel to driver +*/ +static inline int koi_copy_pud_range(struct mm_struct *ko_mm, p4d_t *dst_p4d, + p4d_t *src_p4d, unsigned long addr, + unsigned long end) +{ + pud_t *src_pud, *dst_pud; + unsigned long next; + dst_pud = koi_pud_alloc(ko_mm, dst_p4d, addr); + if (!dst_pud) + return -ENOMEM; + src_pud = pud_offset(src_p4d, addr); + do { + next = pud_addr_end(addr, end); + if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) { + continue; + /* fall through */ + } + if (pud_none_or_clear_bad(src_pud)) + continue; + if (koi_copy_pmd_range(ko_mm, dst_pud, src_pud, addr, next)) + return -ENOMEM; + } while (dst_pud++, src_pud++, addr = next, addr != end); + return 0; +} + +/** +* koi_copy_p4d_range - map the kernel pagetable to the driver space level by level +* @ko_mm: the mm_struct of driver module +* @dst_pgd: destination pgd +* @src_pgd: source pgd +* @addr: the start of address +* @end: the end of address +*/ +static inline int koi_copy_p4d_range(struct mm_struct *ko_mm, pgd_t *dst_pgd, + pgd_t *src_pgd, unsigned long addr, + unsigned long end) +{ + p4d_t *src_p4d, *dst_p4d; + unsigned long next; + dst_p4d = p4d_alloc(ko_mm, dst_pgd, addr); + if (!dst_p4d) + return -ENOMEM; + src_p4d = p4d_offset(src_pgd, addr); + do { + next = p4d_addr_end(addr, end); + if (p4d_none_or_clear_bad(src_p4d)) + continue; + if (koi_copy_pud_range(ko_mm, dst_p4d, src_p4d, addr, next)) { + return -ENOMEM; + } + } while (dst_p4d++, src_p4d++, addr = next, addr != end); + return 0; +} + +/** +*int koi_copy_pagetable - map the address range from "addr" to "end" to the driver pagetable +*@ko_mm: the mm_struct of the driver module +*@koi_pg_dir: koi_pg_dir, related to the driver module, the entry for driver pagetable +*@addr: the starting address of mapping zone +*@end: the end address of mapping zone +*/ +int koi_copy_pagetable(struct mm_struct *ko_mm, pgd_t *koi_pg_dir, + unsigned long addr, unsigned long end) +{ + int ret = 0; + unsigned long next; + + pgd_t *src_pgd, *dst_pgd; + + src_pgd = pgd_offset_pgd(swapper_pg_dir, addr); + dst_pgd = pgd_offset_pgd(koi_pg_dir, addr); + do { + next = pgd_addr_end(addr, end); + if (pgd_none_or_clear_bad(src_pgd)) + continue; + if (unlikely(koi_copy_p4d_range(ko_mm, dst_pgd, src_pgd, addr, + next))) { + ret = -ENOMEM; + break; + } + } while (dst_pgd++, src_pgd++, addr = next, addr != end); + + return ret; +} + +void koi_set_rdonly(unsigned long addr, pgd_t *pgdir) +{ + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + if (pgd_none(*pgdp) || pgd_bad(*pgdp)) { + return; + } + + p4dp = p4d_offset(pgdp, addr); + if (p4d_none(*p4dp) || p4d_bad(*p4dp)) { + return; + } + + pudp = pud_offset(p4dp, addr); + if (pud_none(*pudp) || pud_bad(*pudp)) { + return; + } + pmdp = pmd_offset(pudp, addr); + if (pmd_none(*pmdp) || pmd_bad(*pmdp)) { + return; + } + + ptep = pte_offset_kernel(pmdp, addr); + if (pte_none(*ptep)) { + printk(KERN_ERR "ptep 0x%llx not available\n", ptep); + return; + } + set_pte(ptep, __pte(pte_val(*ptep) | PTE_RDONLY)); + printk(KERN_ERR "set_readonly successfully\n"); + return; +} + +/** +* koi_create_pagetable - create pagetable for driver +* @mod: driver module +* 1.create a new koi_mem_hash_node new_node +* 2.create page table return the pgd address, init the new_node->pgdp +* 3.create and init the new_node->ko_mm +* 4.map swapper_ttbr1 to the newly created pagetable +* 5.map the interrupt vector table to the newly created pagetable +* 6.map the init_layout of the module +* 7.map the core_layout of the module +* 8.map switch_to_kernel_pgtable into driver view +* 9.map share memory +*/ +void koi_create_pagetable(struct module *mod) +{ + int ret = 0, cpu; + unsigned long vbar, addr, ttbr1; + pgd_t *pgdp; + unsigned long *ptr; + struct koi_mem_list *new_mem_node; + struct koi_mem_hash_node *new_node = + kzalloc(sizeof(struct koi_mem_hash_node), GFP_KERNEL); + if (!new_node) { + printk(KERN_ERR "NULL new_node\n"); + return; + }; + if (koi_swapper_ttbr1 == 0) { + pgdp = lm_alias(swapper_pg_dir); + ttbr1 = phys_to_ttbr(virt_to_phys(pgdp)); + if (system_supports_cnp() && + !WARN_ON(pgdp != lm_alias(swapper_pg_dir))) + ttbr1 |= TTBR_CNP_BIT; +#ifdef CONFIG_IEE + ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, 1); +#endif + koi_swapper_ttbr1 = ttbr1; + // __WRITE_ONCE(koi_swapper_ttbr1, ttbr1); + // koi_set_rdonly(&koi_swapper_ttbr1, swapper_pg_dir); + } + new_node->pgdp = koi_pgd_alloc(); + new_node->ko_mm = + kzalloc(sizeof(struct mm_struct) + + sizeof(unsigned long) * BITS_TO_LONGS(NR_CPUS), + GFP_KERNEL); + init_ko_mm(new_node->ko_mm, new_node->pgdp); + new_node->mod = mod; + koi_save_ttbr(mod, new_node->pgdp, new_node); + printk(KERN_ERR "copying koi_data, start=0x%16llx, end=0x%16llx\n", + (unsigned long)__koi_data_start, (unsigned long)__koi_data_end); + // copy koi_swapper_ttbr1, which records page dir base for kernel view + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, + (unsigned long)__koi_data_start, + (unsigned long)__koi_data_end); + asm volatile("mrs %0, VBAR_EL1\n" : "=r"(vbar) :); + + // copy interrupt vectors + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, vbar & PAGE_MASK, + (vbar + PAGE_SIZE) & PAGE_MASK); + + // copy module init_layout, which contains init data and text in driver + ret = koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, + (unsigned long)mod->init_layout.base, + (unsigned long)mod->init_layout.base + + mod->init_layout.size); + if (ret != 0) + printk(KERN_ERR + "\033[33mError occur when copying init_layout, Eno:%d\033[0m\n", + ret); + + // copy module core_layout, which contains non-init data and text in driver + ret = koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, + (unsigned long)mod->core_layout.base, + (unsigned long)mod->core_layout.base + + mod->core_layout.size); + if (ret != 0) + printk(KERN_ERR + "\033[33mError occur when copying core_layout, Eno: %d\033[0m\n", + ret); + + // mapping switch_to_kernel_pgtable into driver view, which is used to switch to kernel view when entering INT + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, + (unsigned long)__koi_code_start, + (unsigned long)__koi_code_end); + + for_each_possible_cpu (cpu) { + ptr = per_cpu(irq_stack_ptr, cpu); + printk(KERN_ERR + "\033[33mirq_stack_ptr on cpu %d addr=0x%16llx, end=0x%16llx\033[0m\n", + cpu, (unsigned long)ptr, + (unsigned long)ptr + IRQ_STACK_SIZE); + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, + (unsigned long)ptr, + (unsigned long)ptr + IRQ_STACK_SIZE); + } + + for_each_possible_cpu (cpu) { + ptr = per_cpu(koi_irq_current_ttbr1, cpu); + printk(KERN_ERR + "\033[33mirq_current_ptr on cpu %d addr=0x%16llx, end=0x%16llx\033[0m\n", + cpu, (unsigned long)ptr, (unsigned long)ptr + PAGE_SIZE); + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, + (unsigned long)ptr, + (unsigned long)ptr + PAGE_SIZE); + } + +#ifdef CONFIG_IEE + // mapping iee_rwx_gate_entry and iee_si_base to ko's pagetable + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, + (unsigned long)__iee_si_base_start, + (unsigned long)__iee_exec_entry_end); +#endif + + // alloc 16KB memory for new ko, and add it into hashtable + addr = (unsigned long)kmalloc(THREAD_SIZE, GFP_KERNEL); + if ((void *)addr == NULL) { + printk(KERN_ERR "alloc buffer error\n"); + } + koi_copy_pagetable(new_node->ko_mm, new_node->pgdp, addr, + addr + THREAD_SIZE); + + new_mem_node = kmalloc(sizeof(struct koi_mem_list), GFP_KERNEL); + if (new_mem_node == NULL) { + printk(KERN_ERR "alloc new_mem_node error\n"); + } + new_mem_node->addr = addr; + new_mem_node->size = THREAD_SIZE; + + new_node->mem_list_head = + (struct list_head)LIST_HEAD_INIT(new_node->mem_list_head); + hash_init(new_node->addr_htbl); + spin_lock_init(&new_node->addr_htbl_spin_lock); + spin_lock_init(&new_node->spin_lock); + + spin_lock(&new_node->spin_lock); + list_add_rcu(&new_mem_node->list, &new_node->mem_list_head); + spin_unlock(&new_node->spin_lock); + + spin_lock(&koi_mem_htbl_spin_lock); + hash_add_rcu(koi_mem_htbl, &new_node->node, + (unsigned long)new_node->mod); + spin_unlock(&koi_mem_htbl_spin_lock); +} +/** +* koi_mem_alloc +*@mod: driver module +*@orig_addr: the starting address of the parameter in kernel +*@size: the size of the parameter +*/ +unsigned long koi_mem_alloc(struct module *mod, unsigned long orig_addr, + unsigned long size) +{ + struct koi_mem_hash_node *target = NULL; + struct koi_mem_list *mem_node; + struct koi_addr_map *new_addr_node; + unsigned long addr = 0, flags; + struct koi_mem_list *new_mem_node; + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + printk(KERN_ERR "mem node for module: %s not found\n", + mod->name); + return 0; + } + spin_lock_irqsave(&target->spin_lock, flags); + list_for_each_entry_rcu (mem_node, &target->mem_list_head, list) { + if (mem_node->size >= size) { + addr = mem_node->addr; + mem_node->size -= size; + if (mem_node->size == 0) { + list_del_rcu(&mem_node->list); + } else { + new_mem_node = + kmalloc(sizeof(struct koi_mem_list), + GFP_ATOMIC); + new_mem_node->addr = addr + size; + new_mem_node->size = mem_node->size; + list_replace_rcu(&mem_node->list, + &new_mem_node->list); + } + call_rcu(&mem_node->rcu, koi_mem_node_free); + } + } + spin_unlock_irqrestore(&target->spin_lock, flags); + if (!addr) { + addr = (unsigned long)kmalloc(THREAD_SIZE, GFP_KERNEL); + if ((void *)addr == NULL) { + return 0; + } + koi_copy_pagetable(target->ko_mm, target->pgdp, addr, + addr + THREAD_SIZE); + mem_node = kmalloc(sizeof(struct koi_mem_list), GFP_KERNEL); + if (!mem_node) { + printk(KERN_ERR "NULL mem_node\n"); + } + if (size > THREAD_SIZE) { + return 0; + } + mem_node->addr = addr + size; + mem_node->size = THREAD_SIZE - size; + spin_lock_irqsave(&target->spin_lock, flags); + list_add_tail_rcu(&mem_node->list, &target->mem_list_head); + spin_unlock_irqrestore(&target->spin_lock, flags); + } + + new_addr_node = kzalloc(sizeof(struct koi_addr_map), GFP_KERNEL); + new_addr_node->buffer_addr = addr; + new_addr_node->orig_addr = orig_addr; + spin_lock_irqsave(&target->addr_htbl_spin_lock, flags); + hash_add_rcu(target->addr_htbl, &new_addr_node->node, + new_addr_node->buffer_addr); + spin_unlock_irqrestore(&target->addr_htbl_spin_lock, flags); + return addr; +} +EXPORT_SYMBOL(koi_mem_alloc); +// find the parameter pointer corresponding to the copy +noinline void *koi_mem_lookup(struct module *mod, unsigned long addr) +{ + struct koi_mem_hash_node *target = NULL; + struct koi_addr_map *addr_map_node; + unsigned long orig_addr = addr; + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + printk(KERN_ERR "mem node for module: %s not found\n", + mod->name); + return NULL; + } + + rcu_read_lock(); + hash_for_each_possible_rcu (target->addr_htbl, addr_map_node, node, + orig_addr) { + if (addr_map_node->buffer_addr == addr) { + break; + } + } + rcu_read_unlock(); + if (addr_map_node) { + return (void *)(addr_map_node->orig_addr); + } else { + return NULL; + } +} +EXPORT_SYMBOL(koi_mem_lookup); +/** +* kio_mem_free - recycle a copy of the copied parameters and synchronize the parameters +* @mod: driver module +* @addr: the starting addr of parameter +* @size: the size of the parameter +* @is_const: const pointers or not +* @count: contry the number of parameters +*/ +noinline void koi_mem_free(struct module *mod, unsigned long addr, + unsigned long size, bool is_const, int count, ...) +{ + struct koi_mem_hash_node *target = NULL; + struct koi_mem_list *mem_node; + struct list_head *pos = NULL; + struct koi_addr_map *addr_map_node; + unsigned long orig_size = size; + unsigned long orig_addr = addr; + va_list valist; + int i; + unsigned int offset; + unsigned long flags; + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + printk(KERN_ERR "mem node for module: %s not found\n", + mod->name); + return; + } + + rcu_read_lock(); + hash_for_each_possible_rcu (target->addr_htbl, addr_map_node, node, + orig_addr) { + if (addr_map_node->buffer_addr == orig_addr) { + break; + } + } + rcu_read_unlock(); + va_start(valist, count); + for (i = 0; i < count; i++) { + offset = va_arg(valist, int); + *(unsigned long *)(addr_map_node->buffer_addr + offset) = + *(unsigned long *)(addr_map_node->orig_addr + offset); + } + va_end(valist); + memcpy((void *)addr_map_node->orig_addr, + (void *)addr_map_node->buffer_addr, orig_size); + + spin_lock_irqsave(&target->addr_htbl_spin_lock, flags); + hlist_del_init_rcu(&addr_map_node->node); + call_rcu(&addr_map_node->rcu, koi_addr_map_node_free); + spin_unlock_irqrestore(&target->addr_htbl_spin_lock, flags); + + spin_lock_irqsave(&target->spin_lock, flags); + list_for_each_entry_rcu (mem_node, &target->mem_list_head, list) { + if (mem_node->addr + mem_node->size == addr) { + pos = mem_node->list.prev; + addr = mem_node->addr; + size += mem_node->size; + list_del_rcu(&mem_node->list); + call_rcu(&mem_node->rcu, koi_mem_node_free); + } else if (addr + size == mem_node->addr) { + if (!pos) + pos = mem_node->list.prev; + size += mem_node->size; + list_del_rcu(&mem_node->list); + call_rcu(&mem_node->rcu, koi_mem_node_free); + } else if (addr + size < mem_node->addr) { + if (!pos) + pos = mem_node->list.prev; + break; + } + } + mem_node = kzalloc(sizeof(struct koi_mem_list), GFP_ATOMIC); + mem_node->addr = addr; + mem_node->size = size; + if (pos) + list_add_rcu(&mem_node->list, pos); + else + list_add_tail_rcu(&mem_node->list, &target->mem_list_head); + spin_unlock_irqrestore(&target->spin_lock, flags); +} +EXPORT_SYMBOL(koi_mem_free); +/** +* koi_mem_free_callback - used to recycle the copy of parameter. +*@addr: the address of the parameter +*@(*func)(void*): callback func, used to release the copy of the parameter pointer +*/ +noinline void koi_mem_free_callback(struct module *mod, unsigned long addr, + unsigned long size, void (*func)(void *)) +{ + struct koi_mem_hash_node *target = NULL; + struct koi_mem_list *mem_node; + struct list_head *pos = NULL; + struct koi_addr_map *addr_map_node; + unsigned long flags; + unsigned long orig_size = size; + unsigned long orig_addr = addr; + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + printk("mem node for module: %s not found\n", mod->name); + return; + } + + rcu_read_lock(); + hash_for_each_possible_rcu (target->addr_htbl, addr_map_node, node, + orig_addr) { + if (addr_map_node->buffer_addr == orig_addr) { + break; + } + } + rcu_read_unlock(); + if (addr_map_node != NULL) { + memcpy((void *)addr_map_node->orig_addr, + (void *)addr_map_node->buffer_addr, orig_size); + func((void *)addr_map_node->orig_addr); + } else { + printk("Cannot find addr_map_node in addr_htbl, maybe addr is in kernel space!!\n"); + func((void *)orig_addr); + } + + spin_lock_irqsave(&target->addr_htbl_spin_lock, flags); + if (addr_map_node != NULL) { + hlist_del_init_rcu(&addr_map_node->node); + call_rcu(&addr_map_node->rcu, koi_addr_map_node_free); + } + spin_unlock_irqrestore(&target->addr_htbl_spin_lock, flags); + spin_lock_irqsave(&target->spin_lock, flags); + list_for_each_entry_rcu (mem_node, &target->mem_list_head, list) { + if (mem_node->addr + mem_node->size == addr) { + pos = mem_node->list.prev; + addr = mem_node->addr; + size += mem_node->size; + list_del_rcu(&mem_node->list); + call_rcu(&mem_node->rcu, koi_mem_node_free); + } else if (addr + size == mem_node->addr) { + if (!pos) + pos = mem_node->list.prev; + size += mem_node->size; + list_del_rcu(&mem_node->list); + call_rcu(&mem_node->rcu, koi_mem_node_free); + } else if (addr + size < mem_node->addr) { + if (!pos) + pos = mem_node->list.prev; + break; + } + } + mem_node = kzalloc(sizeof(struct koi_mem_list), GFP_ATOMIC); + mem_node->addr = addr; + mem_node->size = size; + if (pos) + list_add_rcu(&mem_node->list, pos); + else + list_add_tail_rcu(&mem_node->list, &target->mem_list_head); + spin_unlock_irqrestore(&target->spin_lock, flags); +} +EXPORT_SYMBOL(koi_mem_free_callback); + +void koi_map_mem(struct module *mod, unsigned long addr, unsigned long size) +{ + struct koi_mem_hash_node *target = NULL; + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) + break; + } + rcu_read_unlock(); + + if (target == NULL) { + printk(KERN_ERR "mem node for module: %s not found\n", + mod->name); + return; + } + koi_copy_pagetable(target->ko_mm, target->pgdp, addr & PAGE_MASK, + (addr + size + PAGE_SIZE) & PAGE_MASK); +} +EXPORT_SYMBOL(koi_map_mem); +/** +* koi_mem_free_to_user - function 'copy_to_user' in driver space +*/ +void koi_mem_free_to_user(struct module *mod, unsigned long addr, + unsigned long size) +{ + struct koi_mem_hash_node *target = NULL; + struct koi_mem_list *mem_node; + struct list_head *pos = NULL; + struct koi_addr_map *addr_map_node; + unsigned long flags; + unsigned long orig_size = size; + unsigned long orig_addr = addr; + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + printk(KERN_ERR "mem node for module: %s not found\n", + mod->name); + return; + } + + rcu_read_lock(); + hash_for_each_possible_rcu (target->addr_htbl, addr_map_node, node, + orig_addr) { + if (addr_map_node->buffer_addr == orig_addr) { + break; + } + } + rcu_read_unlock(); + if (copy_to_user((void *)addr_map_node->orig_addr, + (void *)addr_map_node->buffer_addr, orig_size)) { + return; + } + + spin_lock_irqsave(&target->addr_htbl_spin_lock, flags); + hlist_del_init_rcu(&addr_map_node->node); + call_rcu(&addr_map_node->rcu, koi_addr_map_node_free); + spin_unlock_irqrestore(&target->addr_htbl_spin_lock, flags); + spin_lock_irqsave(&target->spin_lock, flags); + list_for_each_entry_rcu (mem_node, &target->mem_list_head, list) { + if (mem_node->addr + mem_node->size == addr) { + pos = mem_node->list.prev; + addr = mem_node->addr; + size += mem_node->size; + list_del_rcu(&mem_node->list); + call_rcu(&mem_node->rcu, koi_mem_node_free); + } else if (addr + size == mem_node->addr) { + if (!pos) + pos = mem_node->list.prev; + size += mem_node->size; + list_del_rcu(&mem_node->list); + call_rcu(&mem_node->rcu, koi_mem_node_free); + } else if (addr + size < mem_node->addr) { + if (!pos) + pos = mem_node->list.prev; + break; + } + } + mem_node = kzalloc(sizeof(struct koi_mem_list), GFP_ATOMIC); + mem_node->addr = addr; + mem_node->size = size; + if (pos) + list_add_rcu(&mem_node->list, pos); + else + list_add_tail_rcu(&mem_node->list, &target->mem_list_head); + spin_unlock_irqrestore(&target->spin_lock, flags); +} +EXPORT_SYMBOL(koi_mem_free_to_user); +// map the driver stack to kernel +void koi_map_kostack(struct module *mod) +{ + struct koi_mem_hash_node *target = NULL; + void *koi_stack; + unsigned long cur_sp; + asm volatile("mov %0, sp\n" : "=r"(cur_sp) :); + if (on_irq_stack(cur_sp, NULL)) { + return; + } +#ifndef CONFIG_IEE + unsigned long res, alloc_token; + struct task_token *token_addr = + (struct task_token *)((unsigned long)current + + (unsigned long)koi_offset); + if (token_addr->koi_stack_base != NULL) + return; +#else + koi_stack = iee_rw_gate(IEE_READ_KOI_STACK, current); + if (koi_stack != NULL) + return; +#endif + koi_stack = + (void *)__get_free_pages(THREADINFO_GFP & ~__GFP_ACCOUNT, 3); + free_pages(koi_stack + 4 * PAGE_SIZE, 2); + printk(KERN_ERR "alloc dstack start=0x%16llx, end=0x%16llx\n", + koi_stack, koi_stack + THREAD_SIZE); +#ifndef CONFIG_IEE + token_addr->koi_stack = + (struct pt_regs *)(THREAD_SIZE + (unsigned long)koi_stack) - 1; + token_addr->koi_stack_base = koi_stack; +#else + iee_rw_gate( + IEE_WRITE_KOI_STACK, current, + (unsigned long)((struct pt_regs *)(THREAD_SIZE + + (unsigned long)koi_stack) - + 1)); + iee_rw_gate(IEE_WRITE_KOI_STACK_BASE, current, + (unsigned long)koi_stack); +#endif + rcu_read_lock(); + hash_for_each_possible_rcu (koi_mem_htbl, target, node, + (unsigned long)mod) { + if (target->mod == mod) { + break; + } + } + rcu_read_unlock(); + if (target == NULL) { + printk(KERN_ERR "mem node for module: %s not found\n", + mod->name); + return; + } + koi_copy_pagetable(target->ko_mm, target->pgdp, + (unsigned long)koi_stack, + (unsigned long)koi_stack + THREAD_SIZE); + printk(KERN_ERR "create ko stack: 0x%16llx\n", + (unsigned long)koi_stack); +} +EXPORT_SYMBOL(koi_map_kostack); \ No newline at end of file diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index cea96ee75d22..cbddc8e464e4 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -79,8 +79,13 @@ int memcmp_pages(struct page *page1, struct page *page2) static inline void __mte_enable_kernel(const char *mode, unsigned long tcf) { /* Enable MTE Sync Mode for EL1. */ +#ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_TCF_MASK, + SYS_FIELD_PREP(SCTLR_EL1, TCF, tcf)); +#else sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF_MASK, SYS_FIELD_PREP(SCTLR_EL1, TCF, tcf)); +#endif isb(); pr_info_once("MTE: enabled in %s mode at EL1\n", mode); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 068e5bb2661b..c98bc1a24fc7 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -467,11 +467,24 @@ static void ssbs_thread_switch(struct task_struct *next) * This is *only* for exception entry from EL0, and is not valid until we * __switch_to() a user task. */ +#ifdef CONFIG_IEE +// Put __entry_task in a isolated page to protect it. +__attribute__((aligned(PAGE_SIZE))) DEFINE_PER_CPU(struct task_struct *[PAGE_SIZE/sizeof(struct task_struct *)], __entry_task); +extern void iee_write_entry_task(struct task_struct *tsk); +#else DEFINE_PER_CPU(struct task_struct *, __entry_task); +#endif static void entry_task_switch(struct task_struct *next) { + #ifdef CONFIG_IEE + if(next == &init_task) + iee_write_entry_task((struct task_struct *)__va(__pa_symbol(next))); + else + iee_write_entry_task(next); + #else __this_cpu_write(__entry_task, next); + #endif } /* @@ -506,11 +519,15 @@ static void erratum_1418040_new_exec(void) */ void update_sctlr_el1(u64 sctlr) { - /* + /* * EnIA must not be cleared while in the kernel as this is necessary for * in-kernel PAC. It will be cleared on kernel exit if needed. */ + #ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, SCTLR_USER_MASK & ~SCTLR_ELx_ENIA, sctlr); + #else sysreg_clear_set(sctlr_el1, SCTLR_USER_MASK & ~SCTLR_ELx_ENIA, sctlr); + #endif /* ISB required for the kernel uaccess routines when setting TCF0. */ isb(); diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 58a97861bfc5..c7839247327d 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -551,7 +551,11 @@ static enum mitigation_state spectre_v4_enable_hw_mitigation(void) return state; if (spectre_v4_mitigations_off()) { +#ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, 0, SCTLR_ELx_DSSBS); +#else sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS); +#endif set_pstate_ssbs(1); return SPECTRE_VULNERABLE; } @@ -975,7 +979,11 @@ static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot) if (arm64_kernel_unmapped_at_el0()) return; +#ifdef CONFIG_IEE + iee_rwx_gate_entry(IEE_WRITE_vbar_el1, v); +#else write_sysreg(v, vbar_el1); +#endif isb(); } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 95cb22c083c8..df73a583a733 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -33,6 +33,11 @@ #include <linux/scs.h> #include <linux/mm.h> +#ifdef CONFIG_IEE +#include <linux/iee-func.h> +#include <asm/iee-si.h> +#endif + #include <asm/acpi.h> #include <asm/fixmap.h> #include <asm/cpu.h> @@ -335,10 +340,30 @@ u64 cpu_logical_map(unsigned int cpu) return __cpu_logical_map[cpu]; } +#ifdef CONFIG_IEE +/* used for secure modification of vbar*/ +extern char __bp_harden_el1_vectors[]; +/* prepare iee rwx gate for senario of ttbr1=init_pg_dir */ +static void __init iee_si_init_early(void) +{ + /* prepare data used for iee rwx gate. */ + iee_base_idmap_pg_dir = phys_to_ttbr(__pa_symbol(idmap_pg_dir)); + iee_base_reserved_pg_dir = phys_to_ttbr(__pa_symbol(reserved_pg_dir)) + | FIELD_PREP(TTBR_ASID_MASK, 1); + iee_base__bp_harden_el1_vectors = (unsigned long)__bp_harden_el1_vectors; + iee_si_tcr = 0; +} +#endif + void __init __no_sanitize_address setup_arch(char **cmdline_p) { setup_initial_init_mm(_stext, _etext, _edata, _end); + #ifdef CONFIG_IEE + init_new_context(&init_task, &init_mm); + atomic64_set(&init_mm.context.id, (1UL << get_cpu_asid_bits()) | INIT_ASID); + #endif + *cmdline_p = boot_command_line; kaslr_init(); @@ -371,6 +396,14 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) */ local_daif_restore(DAIF_PROCCTX_NOIRQ); +#ifdef CONFIG_IEE + /* + * Map iee si codes to init_pg_dir to run the following + * cpu_uninstall_idmap() which writes ttbr0. + */ + iee_si_init_early(); +#endif + /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index b7b7afb4a8c7..168a9390d6e9 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -902,6 +902,32 @@ const char *esr_get_class_string(unsigned long esr) return esr_class_str[ESR_ELx_EC(esr)]; } +#ifdef CONFIG_IEE +extern void arm64_enter_nmi(struct pt_regs *regs); +static const char *handler[]= { + "SP_EL0", + "ELR_EL1", + "TCR_EL1", + "TTBR0 ASID" + "IEE_SI" +}; + +asmlinkage void notrace iee_bad_mode(struct pt_regs *regs, int reason, unsigned int esr) +{ + arm64_enter_nmi(regs); + + console_verbose(); + + pr_crit("IEE : Bad mode in %s check detected on CPU%d, code 0x%08x -- %s\n", + handler[reason], smp_processor_id(), esr, + esr_get_class_string(esr)); + + __show_regs(regs); + local_daif_mask(); + panic("bad mode"); +} +#endif + /* * bad_el0_sync handles unexpected, but potentially recoverable synchronous * exceptions taken from EL0. diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 3cd7e76cc562..287eaba7a15b 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -134,6 +134,52 @@ jiffies = jiffies_64; #define UNWIND_DATA_SECTIONS #endif +#ifdef CONFIG_IEE +#define IEE_TEXT \ + . = ALIGN(PAGE_SIZE); \ + __iee_code_start = .; \ + *(.iee.text.header) \ + *(.iee.text) \ + . = ALIGN(PAGE_SIZE); \ + __iee_code_end = .; +#else +#define IEE_TEXT +#endif + +#ifdef CONFIG_IEE +#define IEE_SI_TEXT \ + . = ALIGN(PAGE_SIZE); \ + __iee_si_data_start = .; \ + *(.iee.si_data) \ + . = ALIGN(PAGE_SIZE); \ + __iee_exec_entry_start = .; \ + __iee_si_no_irq = . + (16); \ + *(.iee.exec_entry) \ + . = ALIGN(PAGE_SIZE); \ + __iee_si_start = .; \ + *(.iee.si_text) \ + . = ALIGN(PAGE_SIZE); \ + . += PAGE_SIZE - (24); \ + __iee_si_end = . + (24); \ + __iee_exec_exit = .; \ + *(.iee.exec_exit) \ + . = ALIGN(PAGE_SIZE); + +#else +#define IEE_SI_TEXT +#endif + +#ifdef CONFIG_KOI +#define KOI_TEXT \ + . = ALIGN(PAGE_SIZE); \ + __koi_code_start = .; \ + *(.koi.text) \ + . = ALIGN(PAGE_SIZE); \ + __koi_code_end = .; +#else +#define KOI_TEXT +#endif + /* * The size of the PE/COFF section that covers the kernel image, which * runs from _stext to _edata, must be a round multiple of the PE/COFF @@ -176,10 +222,13 @@ SECTIONS SOFTIRQENTRY_TEXT ENTRY_TEXT TEXT_TEXT + IEE_TEXT SCHED_TEXT LOCK_TEXT KPROBES_TEXT HYPERVISOR_TEXT + IEE_SI_TEXT + KOI_TEXT *(.gnu.warning) } @@ -318,6 +367,18 @@ SECTIONS . += INIT_DIR_SIZE; init_pg_end = .; + #ifdef CONFIG_IEE + . = ALIGN(PAGE_SIZE*8); + init_iee_stack_begin = .; + . += PAGE_SIZE*4; + init_iee_stack_end = .; + + . = ALIGN(PAGE_SIZE); + init_iee_si_stack_begin = .; + . += PAGE_SIZE*4; + init_iee_si_stack_end = .; + #endif + . = ALIGN(SEGMENT_ALIGN); __pecoff_data_size = ABSOLUTE(. - __initdata_begin); _end = .; diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 188197590fc9..97e1f86046cf 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -17,6 +17,10 @@ #include <asm/smp.h> #include <asm/tlbflush.h> +#ifdef CONFIG_IEE +#include <asm/iee-si.h> +#endif + static u32 asid_bits; static DEFINE_RAW_SPINLOCK(cpu_asid_lock); @@ -39,7 +43,11 @@ static unsigned long *pinned_asid_map; #define asid2ctxid(asid, genid) ((asid) | (genid)) /* Get the ASIDBits supported by the current CPU */ +#ifdef CONFIG_IEE +u32 get_cpu_asid_bits(void) +#else static u32 get_cpu_asid_bits(void) +#endif { u32 asid; int fld = cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64MMFR0_EL1), @@ -212,6 +220,38 @@ static u64 new_context(struct mm_struct *mm) return asid2ctxid(asid, generation); } +#ifdef CONFIG_KOI +/* + * This function is used to check and allocate ASID for ko's pgd + * The mm MUST point to the isolated kos' mm_struct, other behaviours are undefined. + */ +void koi_check_and_switch_context(struct mm_struct *mm) { + u64 asid = atomic64_read(&mm->context.id); + u64 old_active_asid; + unsigned long flags; + unsigned int cpu; + + old_active_asid = atomic64_read(this_cpu_ptr(&active_asids)); + if (old_active_asid && asid_gen_match(asid) && atomic64_cmpxchg_relaxed(this_cpu_ptr(&active_asids), old_active_asid, asid)) { + return; + } + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + asid = atomic64_read(&mm->context.id); + if (!asid_gen_match(asid)) { + asid = new_context(mm); + atomic64_set(&mm->context.id, asid); + } + + cpu = smp_processor_id(); + if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) + local_flush_tlb_all(); + + atomic64_set(this_cpu_ptr(&active_asids), asid); + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); +} +#endif + void check_and_switch_context(struct mm_struct *mm) { unsigned long flags; @@ -348,7 +388,9 @@ asmlinkage void post_ttbr_update_workaround(void) void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm) { + #ifndef CONFIG_IEE unsigned long ttbr1 = read_sysreg(ttbr1_el1); + #endif unsigned long asid = ASID(mm); unsigned long ttbr0 = phys_to_ttbr(pgd_phys); @@ -360,14 +402,28 @@ void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm) if (IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN)) ttbr0 |= FIELD_PREP(TTBR_ASID_MASK, asid); - /* Set ASID in TTBR1 since TCR.A1 is set */ + #ifdef CONFIG_IEE + ttbr0 |= FIELD_PREP(TTBR_ASID_MASK, asid+1); + iee_rwx_gate_entry(IEE_CONTEXT_SWITCH, ttbr0); + // TODO : if defined CONFIG_IEE and defined CONFIG_KOI + #else + /* Set ASID in TTBR0 since TCR.A1 is set 0*/ + + #ifdef CONFIG_KOI + ttbr0 |= FIELD_PREP(TTBR_ASID_MASK, asid+1); + ttbr1 &= ~TTBR_ASID_MASK; + ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, asid); + #else ttbr1 &= ~TTBR_ASID_MASK; ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, asid); - + + #endif cpu_set_reserved_ttbr0_nosync(); write_sysreg(ttbr1, ttbr1_el1); write_sysreg(ttbr0, ttbr0_el1); isb(); + #endif + post_ttbr_update_workaround(); } @@ -375,11 +431,21 @@ static int asids_update_limit(void) { unsigned long num_available_asids = NUM_USER_ASIDS; - if (arm64_kernel_unmapped_at_el0()) { - num_available_asids /= 2; - if (pinned_asid_map) - set_kpti_asid_bits(pinned_asid_map); - } + #if defined(CONFIG_IEE) || defined(CONFIG_KOI) + num_available_asids /= 2; + if (pinned_asid_map) { + unsigned int len = BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(unsigned long); + memset(pinned_asid_map, 0xaa, len); + __set_bit(INIT_ASID, pinned_asid_map); + } + #else + if (arm64_kernel_unmapped_at_el0()) { + num_available_asids /= 2; + if (pinned_asid_map) + set_kpti_asid_bits(pinned_asid_map); + } + #endif + /* * Expect allocation after rollover to fail if we don't have at least * one more ASID than CPUs. ASID #0 is reserved for init_mm. @@ -400,6 +466,10 @@ arch_initcall(asids_update_limit); static int asids_init(void) { + #if defined(CONFIG_IEE) || defined(CONFIG_KOI) + unsigned int len; + #endif + asid_bits = get_cpu_asid_bits(); atomic64_set(&asid_generation, ASID_FIRST_VERSION); asid_map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL); @@ -410,6 +480,11 @@ static int asids_init(void) pinned_asid_map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL); nr_pinned_asids = 0; + #if defined(CONFIG_IEE) || defined(CONFIG_KOI) + len = BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(unsigned long); + memset(asid_map, 0xaa, len); + __set_bit(INIT_ASID, asid_map); + #else /* * We cannot call set_reserved_asid_bits() here because CPU * caps are not finalized yet, so it is safer to assume KPTI @@ -417,6 +492,8 @@ static int asids_init(void) */ if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) set_kpti_asid_bits(asid_map); + #endif + return 0; } early_initcall(asids_init); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 4ea07caba71c..de1d57c2b30f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -261,7 +261,11 @@ int __ptep_set_access_flags(struct vm_area_struct *vma, pteval ^= PTE_RDONLY; pteval |= pte_val(entry); pteval ^= PTE_RDONLY; + #ifdef CONFIG_PTP + pteval = iee_set_cmpxchg_relaxed(ptep, old_pteval, pteval); + #else pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval); + #endif } while (pteval != old_pteval); /* Invalidate a stale read-only entry */ @@ -376,8 +380,13 @@ static void do_tag_recovery(unsigned long addr, unsigned long esr, * It will be done lazily on the other CPUs when they will hit a * tag fault. */ + #ifdef CONFIG_IEE + sysreg_clear_set_iee_si(sctlr_el1, SCTLR_EL1_TCF_MASK, + SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF, NONE)); + #else sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF_MASK, SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF, NONE)); + #endif isb(); } diff --git a/arch/arm64/mm/fixmap.c b/arch/arm64/mm/fixmap.c index bfc02568805a..580ecb596d2d 100644 --- a/arch/arm64/mm/fixmap.c +++ b/arch/arm64/mm/fixmap.c @@ -32,6 +32,22 @@ static pte_t bm_pte[NR_BM_PTE_TABLES][PTRS_PER_PTE] __page_aligned_bss; static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; +#ifdef CONFIG_IEE +void *bm_pte_addr = (void *)bm_pte; +void *bm_pmd_addr = (void *)bm_pmd; +void *bm_pud_addr = (void *)bm_pud; +#endif + +#ifdef CONFIG_PTP +extern void __iee_p4d_populate_pre_init(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot); +extern void __iee_pud_populate_pre_init(pud_t *pudp, phys_addr_t pmdp, pudval_t prot); +extern void __iee_pmd_populate_pre_init(pmd_t *pmdp, phys_addr_t ptep, + pmdval_t prot); + +extern void iee_set_p4d_pre_init(p4d_t *p4dp, p4d_t p4d); +#define set_pgd_init(pgdptr, pgdval) iee_set_p4d_pre_init((p4d_t *)(pgdptr), (p4d_t) { pgdval }) +#endif + static inline pte_t *fixmap_pte(unsigned long addr) { return &bm_pte[BM_PTE_TABLE_IDX(addr)][pte_index(addr)]; @@ -44,7 +60,11 @@ static void __init early_fixmap_init_pte(pmd_t *pmdp, unsigned long addr) if (pmd_none(pmd)) { ptep = bm_pte[BM_PTE_TABLE_IDX(addr)]; + #ifdef CONFIG_PTP + __iee_pmd_populate_pre_init(pmdp, __pa_symbol(ptep), PMD_TYPE_TABLE); + #else __pmd_populate(pmdp, __pa_symbol(ptep), PMD_TYPE_TABLE); + #endif } } @@ -55,8 +75,13 @@ static void __init early_fixmap_init_pmd(pud_t *pudp, unsigned long addr, pud_t pud = READ_ONCE(*pudp); pmd_t *pmdp; - if (pud_none(pud)) + if (pud_none(pud)) { + #ifdef CONFIG_PTP + __iee_pud_populate_pre_init(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE); + #else __pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE); + #endif + } pmdp = pmd_offset_kimg(pudp, addr); do { @@ -82,8 +107,13 @@ static void __init early_fixmap_init_pud(p4d_t *p4dp, unsigned long addr, BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); } - if (p4d_none(p4d)) + if (p4d_none(p4d)) { + #ifdef CONFIG_PTP + __iee_p4d_populate_pre_init(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE); + #else __p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE); + #endif + } pudp = pud_offset_kimg(p4dp, addr); early_fixmap_init_pmd(pudp, addr, end); @@ -106,6 +136,27 @@ void __init early_fixmap_init(void) early_fixmap_init_pud(p4dp, addr, end); } +#ifdef CONFIG_PTP +extern void iee_set_pte_pre_init(pte_t *ptep, pte_t pte); +void __iee_set_fixmap_pre_init(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags) +{ + unsigned long addr = __fix_to_virt(idx); + pte_t *ptep; + + BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); + + ptep = fixmap_pte(addr); + + if (pgprot_val(flags)) { + iee_set_pte_pre_init(ptep, pfn_pte(phys >> PAGE_SHIFT, flags)); + } else { + iee_set_pte_pre_init(ptep, __pte(0)); + flush_tlb_kernel_range(addr, addr+PAGE_SIZE); + } +} +#endif + /* * Unusually, this is also called in IRQ context (ghes_iounmap_irq) so if we * ever need to use IPIs for TLB broadcasting, then we're in trouble here. @@ -121,9 +172,17 @@ void __set_fixmap(enum fixed_addresses idx, ptep = fixmap_pte(addr); if (pgprot_val(flags)) { + #ifdef CONFIG_PTP + iee_set_bm_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags)); + #else __set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags)); + #endif } else { + #ifdef CONFIG_PTP + iee_set_bm_pte(ptep, __pte(0)); + #else __pte_clear(&init_mm, addr, ptep); + #endif flush_tlb_kernel_range(addr, addr+PAGE_SIZE); } } @@ -179,8 +238,13 @@ void __init fixmap_copy(pgd_t *pgdir) * live in the carveout for the swapper_pg_dir. We can simply * re-use the existing dir for the fixmap. */ + #ifdef CONFIG_PTP + set_pgd_init(pgd_offset_pgd(pgdir, FIXADDR_TOT_START), + READ_ONCE(*pgd_offset_k(FIXADDR_TOT_START))); + #else set_pgd(pgd_offset_pgd(pgdir, FIXADDR_TOT_START), READ_ONCE(*pgd_offset_k(FIXADDR_TOT_START))); + #endif } else if (CONFIG_PGTABLE_LEVELS > 3) { pgd_t *bm_pgdp; p4d_t *bm_p4dp; @@ -194,9 +258,15 @@ void __init fixmap_copy(pgd_t *pgdir) BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); bm_pgdp = pgd_offset_pgd(pgdir, FIXADDR_TOT_START); bm_p4dp = p4d_offset(bm_pgdp, FIXADDR_TOT_START); + #ifdef CONFIG_PTP + bm_pudp = pud_set_fixmap_offset_init(bm_p4dp, FIXADDR_TOT_START); + __iee_pud_populate_pre_init(bm_pudp, __pa(lm_alias(bm_pmd)), PMD_TYPE_TABLE); + pud_clear_fixmap_init(); + #else bm_pudp = pud_set_fixmap_offset(bm_p4dp, FIXADDR_TOT_START); pud_populate(&init_mm, bm_pudp, lm_alias(bm_pmd)); pud_clear_fixmap(); + #endif } else { BUG(); } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 8c8d7653ba84..4190f5c10d68 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -55,8 +55,19 @@ * that cannot be mistaken for a real physical address. */ s64 memstart_addr __ro_after_init = -1; +#if defined(CONFIG_IEE) || defined(CONFIG_KOI) +s64 memstart_addr_init __ro_after_init = -1; +#endif +#ifdef CONFIG_KOI +s64 koi_offset __ro_after_init = -1; +EXPORT_SYMBOL(koi_offset); +#endif +#ifdef CONFIG_IEE +s64 iee_offset __ro_after_init = -1; +#endif EXPORT_SYMBOL(memstart_addr); + /* * If the corresponding config options are enabled, we create both ZONE_DMA * and ZONE_DMA32. By default ZONE_DMA covers the 32-bit addressable memory @@ -421,7 +432,11 @@ early_param("memmap", parse_memmap_opt); void __init arm64_memblock_init(void) { + #if defined(CONFIG_IEE) || defined(CONFIG_KOI) + s64 linear_region_size = BIT(vabits_actual - 2); + #else s64 linear_region_size = PAGE_END - _PAGE_OFFSET(vabits_actual); + #endif /* * Corner case: 52-bit VA capable systems running KVM in nVHE mode may @@ -438,13 +453,24 @@ void __init arm64_memblock_init(void) } /* Remove memory above our supported physical address size */ + #ifdef CONFIG_IEE + // If config iee, phys size can not be above 0x400000000000 + if(__pa_symbol(_end) > BIT_ULL(vabits_actual - 2)) + panic("Image on too high phys mem.\n"); + else + memblock_remove(BIT_ULL(vabits_actual - 2), ULLONG_MAX); + #else memblock_remove(1ULL << PHYS_MASK_SHIFT, ULLONG_MAX); + #endif /* * Select a suitable value for the base of physical memory. */ memstart_addr = round_down(memblock_start_of_DRAM(), ARM64_MEMSTART_ALIGN); + #if defined(CONFIG_IEE) || defined(CONFIG_KOI) + memstart_addr_init = memstart_addr; + #endif if ((memblock_end_of_DRAM() - memstart_addr) > linear_region_size) pr_warn("Memory doesn't fit in the linear mapping, VA_BITS too small\n"); @@ -531,6 +557,14 @@ void __init arm64_memblock_init(void) ((range * memstart_offset_seed) >> 16); } } + + #ifdef CONFIG_KOI + koi_offset = memstart_addr - memstart_addr_init + KOI_OFFSET; + #endif + #ifdef CONFIG_IEE + iee_offset = memstart_addr - memstart_addr_init + ((unsigned long)BIT(vabits_actual - 2)); + #endif + //printk(KERN_ERR "koi_offset: 0x%16llx\n", koi_offset); /* * Register the kernel text, kernel data, initrd, and initial diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 4142a75a414e..094f3798441d 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -6,6 +6,7 @@ * Copyright (C) 2012 ARM Ltd. */ +#include "asm/pgtable.h" #include <linux/cache.h> #include <linux/export.h> #include <linux/kernel.h> @@ -40,6 +41,11 @@ #include <asm/tlbflush.h> #include <asm/pgalloc.h> #include <asm/kfence.h> +#ifdef CONFIG_IEE +#include <linux/iee-func.h> +#include <asm/iee.h> +#include <asm/iee-si.h> +#endif #define NO_BLOCK_MAPPINGS BIT(0) #define NO_CONT_MAPPINGS BIT(1) @@ -76,8 +82,282 @@ EXPORT_SYMBOL(empty_zero_page); static DEFINE_SPINLOCK(swapper_pgdir_lock); static DEFINE_MUTEX(fixmap_lock); +#ifdef CONFIG_IEE +extern struct cred init_cred; + +extern unsigned long init_iee_stack_begin[]; +extern unsigned long init_iee_stack_end[]; +extern unsigned long __iee_si_data_start[]; +extern unsigned long __iee_exec_entry_start[]; +extern unsigned long __iee_si_start[]; +extern unsigned long __iee_si_end[]; + +extern void *bm_pte_addr; +extern void *bm_pmd_addr; +extern void *bm_pud_addr; + +#ifdef CONFIG_PTP + +/* Funcs to set pgtable before iee initialized. */ +static void iee_set_swapper_pgd_pre_init(pgd_t *pgdp, pgd_t pgd) +{ + pgd_t *fixmap_pgdp; + + spin_lock(&swapper_pgdir_lock); + fixmap_pgdp = pgd_set_fixmap_init(__pa_symbol(pgdp)); + WRITE_ONCE(*fixmap_pgdp, pgd); + /* + * We need dsb(ishst) here to ensure the page-table-walker sees + * our new entry before set_p?d() returns. The fixmap's + * flush_tlb_kernel_range() via clear_fixmap() does this for us. + */ + pgd_clear_fixmap_init(); + spin_unlock(&swapper_pgdir_lock); +} + +void iee_set_p4d_pre_init(p4d_t *p4dp, p4d_t p4d) +{ + if (in_swapper_pgdir(p4dp)) { + iee_set_swapper_pgd_pre_init((pgd_t *)p4dp, __pgd(p4d_val(p4d))); + return; + } + + WRITE_ONCE(*p4dp, p4d); + dsb(ishst); + isb(); +} + +static inline void iee_set_pud_pre_init(pud_t *pudp, pud_t pud) +{ +#ifdef __PAGETABLE_PUD_FOLDED + if (in_swapper_pgdir(pudp)) { + iee_set_swapper_pgd_pre_init((pgd_t *)pudp, __pgd(pud_val(pud))); + return; + } +#endif /* __PAGETABLE_PUD_FOLDED */ +#ifdef CONFIG_KOI + pudval_t val = pud_val(pud); + if (pud_valid(pud) && !(val & PUD_TABLE_BIT)) { + // There is no PUD_SEC_NG, so we use PMD_SECT_NG instead. + pud = __pud(val | PMD_SECT_NG); + } +#endif + WRITE_ONCE(*pudp, pud); + + if (pud_valid(pud)) { + dsb(ishst); + isb(); + } +} + +static inline void iee_set_pmd_pre_init(pmd_t *pmdp, pmd_t pmd) +{ +#ifdef __PAGETABLE_PMD_FOLDED + if (in_swapper_pgdir(pmdp)) { + iee_set_swapper_pgd_pre_init((pgd_t *)pmdp, __pgd(pmd_val(pmd))); + return; + } +#endif /* __PAGETABLE_PMD_FOLDED */ +#ifdef CONFIG_KOI + pmdval_t val = pmd_val(pmd); + if (pmd_valid(pmd) && !(val & PMD_TABLE_BIT)) { + pmd = __pmd(val | PMD_SECT_NG); + } +#endif + WRITE_ONCE(*pmdp, pmd); + + if (pmd_valid(pmd)) { + dsb(ishst); + isb(); + } +} + + +void __iee_p4d_populate_pre_init(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) +{ + iee_set_p4d_pre_init(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot)); +} + +void __iee_pud_populate_pre_init(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) +{ + iee_set_pud_pre_init(pudp, __pud(__phys_to_pud_val(pmdp) | prot)); +} + +void __iee_pmd_populate_pre_init(pmd_t *pmdp, phys_addr_t ptep, + pmdval_t prot) +{ + iee_set_pmd_pre_init(pmdp, __pmd(__phys_to_pmd_val(ptep) | prot)); +} + +/* Funcs to set fixmap before iee initialized. */ +bool pgattr_change_is_safe(u64 old, u64 new); +static int iee_pud_set_huge_fixmap(pud_t *pudp, phys_addr_t phys, pgprot_t prot) +{ + pud_t new_pud = pfn_pud(__phys_to_pfn(phys), mk_pud_sect_prot(prot)); + + /* Only allow permission changes for now */ + if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)), + pud_val(new_pud))) + return 0; + + VM_BUG_ON(phys & ~PUD_MASK); + iee_set_fixmap_pud_pre_init(pudp, new_pud); + return 1; +} + +static int iee_pmd_set_huge_fixmap(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) +{ + pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), mk_pmd_sect_prot(prot)); + + /* Only allow permission changes for now */ + if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)), + pmd_val(new_pmd))) + return 0; + + VM_BUG_ON(phys & ~PMD_MASK); + iee_set_fixmap_pmd_pre_init(pmdp, new_pmd); + return 1; +} + +static inline void __iee_pmd_populate_fixmap(pmd_t *pmdp, phys_addr_t ptep, + pmdval_t prot) +{ + iee_set_fixmap_pmd_pre_init(pmdp, __pmd(__phys_to_pmd_val(ptep) | prot)); +} + +static inline void __iee_pud_populate_fixmap(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) +{ + iee_set_fixmap_pud_pre_init(pudp, __pud(__phys_to_pud_val(pmdp) | prot)); +} +#endif /* END CONFIG_PTP*/ + +void iee_set_pte_pre_init(pte_t *ptep, pte_t pte) +{ +#ifdef CONFIG_KOI + if (!pte_none(pte)) { + pte = __pte(pte_val(pte) | PTE_NG); + } +#endif + WRITE_ONCE(*ptep, pte); + + /* + * Only if the new pte is valid and kernel, otherwise TLB maintenance + * or update_mmu_cache() have the necessary barriers. + */ + if (pte_valid_not_user(pte)) { + dsb(ishst); + isb(); + } +} + +static void __init iee_set_token_page_valid_pre_init(void *token, void *new) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, (unsigned long)token); + + p4d_t *p4dp = p4d_offset(pgdp, (unsigned long)token); + + pud_t *pudp = pud_offset(p4dp, (unsigned long)token); + + pmd_t *pmdp = pmd_offset(pudp, (unsigned long)token); + + pte_t *ptep = pte_offset_kernel(pmdp, (unsigned long)token); + pte_t pte = READ_ONCE(*ptep); + pte = __pte(((pte_val(pte) | 0x1) & ~PTE_ADDR_MASK) | __phys_to_pte_val(__pa(new))); + iee_set_pte_pre_init(ptep, pte); + flush_tlb_kernel_range((unsigned long)token, (unsigned long)(token+PAGE_SIZE)); + isb(); +} +#endif /* END CONFIG_IEE*/ + +#if defined(CONFIG_KOI) && !defined(CONFIG_IEE) +int koi_add_page_mapping(unsigned long dst, unsigned long src) +{ + pgd_t *src_pgdp, *dst_pgdp; + p4d_t *src_p4dp, *dst_p4dp; + pud_t *src_pudp, *dst_pudp; + pmd_t *src_pmdp, *dst_pmdp; + pte_t *src_ptep, *dst_ptep; + + src_pgdp = pgd_offset_pgd(swapper_pg_dir, src); + dst_pgdp = pgd_offset_pgd(swapper_pg_dir, dst); + + src_p4dp = p4d_offset(src_pgdp, src); + dst_p4dp = p4d_alloc(&init_mm, dst_pgdp, dst); + if (!dst_p4dp) { + return -ENOMEM; + } + src_pudp = pud_offset(src_p4dp, src); + dst_pudp = pud_alloc(&init_mm, dst_p4dp, dst); + if (!dst_pudp) { + return -ENOMEM; + } + if (pud_val(*src_pudp) & PMD_TABLE_BIT) { + src_pmdp = pmd_offset(src_pudp, src); + dst_pmdp = pmd_alloc(&init_mm, dst_pudp, dst); + if (!dst_pmdp) { + return -ENOMEM; + } + if (pmd_val(*src_pmdp) & PMD_TABLE_BIT) { + src_ptep = pte_offset_kernel(src_pmdp, src); + dst_ptep = pte_alloc_map(&init_mm, dst_pmdp, dst); + set_pte(dst_ptep, *src_ptep); + } else { + set_pte((pte_t *)dst_pmdp, pmd_pte(*src_pmdp)); + } + } else { + set_pte((pte_t *)dst_pudp, pud_pte(*src_pudp)); + } + + + flush_tlb_kernel_range(dst, dst+PAGE_SIZE); + isb(); + return 0; +} + +void koi_remove_page_mapping(unsigned long addr) { + pgd_t *src_pgdp; + p4d_t *src_p4dp; + pud_t *src_pudp; + pmd_t *src_pmdp; + pte_t *src_ptep; + + src_pgdp = pgd_offset_pgd(swapper_pg_dir, addr); + if (pgd_none(*src_pgdp) || pgd_bad(*src_pgdp)) + return; + src_p4dp = p4d_offset(src_pgdp, addr); + if (p4d_none(*src_p4dp) || p4d_bad(*src_p4dp)) + return; + src_pudp = pud_offset(src_p4dp, addr); + if (pud_none(*src_pudp)) + return; + if (pud_val(*src_pudp) & PMD_TABLE_BIT) { + src_pmdp = pmd_offset(src_pudp, addr); + if (pmd_none(*src_pmdp)) + return; + if (pmd_val(*src_pmdp) & PMD_TABLE_BIT) { + src_ptep = pte_offset_kernel(src_pmdp, addr); + if(!pte_none(*src_ptep)) + pte_clear(&init_mm, addr, src_ptep); + } else { + pmd_clear(src_pmdp); + } + } else { + pud_clear(src_pudp); + } + + flush_tlb_kernel_range(addr, addr+PAGE_SIZE); + isb(); +} +#endif + void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) { + #ifdef CONFIG_PTP + spin_lock(&swapper_pgdir_lock); + iee_rw_gate(IEE_OP_SET_SWAPPER_PGD, pgdp, pgd); + spin_unlock(&swapper_pgdir_lock); + #else pgd_t *fixmap_pgdp; spin_lock(&swapper_pgdir_lock); @@ -90,6 +370,7 @@ void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) */ pgd_clear_fixmap(); spin_unlock(&swapper_pgdir_lock); + #endif } pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, @@ -118,7 +399,11 @@ static phys_addr_t __init early_pgtable_alloc(int shift) * slot will be free, so we can (ab)use the FIX_PTE slot to initialise * any level of table. */ + #ifdef CONFIG_PTP + ptr = pte_set_fixmap_init(phys); + #else ptr = pte_set_fixmap(phys); + #endif memset(ptr, 0, PAGE_SIZE); @@ -126,11 +411,16 @@ static phys_addr_t __init early_pgtable_alloc(int shift) * Implicit barriers also ensure the zeroed page is visible to the page * table walker */ + #ifdef CONFIG_PTP + pte_clear_fixmap_init(); + #else pte_clear_fixmap(); + #endif return phys; } + bool pgattr_change_is_safe(u64 old, u64 new) { /* @@ -178,7 +468,11 @@ static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end, do { pte_t old_pte = __ptep_get(ptep); + #ifdef CONFIG_PTP + iee_set_fixmap_pte_pre_init(ptep, pfn_pte(__phys_to_pfn(phys), prot)); + #else __set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot)); + #endif /* * After the PTE entry has been populated once, we @@ -211,7 +505,11 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, pmdval |= PMD_TABLE_PXN; BUG_ON(!pgtable_alloc); pte_phys = pgtable_alloc(PAGE_SHIFT); + #ifdef CONFIG_PTP + __iee_pmd_populate_fixmap(pmdp, pte_phys, pmdval); + #else __pmd_populate(pmdp, pte_phys, pmdval); + #endif pmd = READ_ONCE(*pmdp); } BUG_ON(pmd_bad(pmd)); @@ -248,7 +546,11 @@ static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end, /* try section mapping first */ if (((addr | next | phys) & ~PMD_MASK) == 0 && (flags & NO_BLOCK_MAPPINGS) == 0) { + #ifdef CONFIG_PTP + iee_pmd_set_huge_fixmap(pmdp, phys, prot); + #else pmd_set_huge(pmdp, phys, prot); + #endif /* * After the PMD entry has been populated once, we @@ -289,7 +591,11 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, pudval |= PUD_TABLE_PXN; BUG_ON(!pgtable_alloc); pmd_phys = pgtable_alloc(PMD_SHIFT); - __pud_populate(pudp, pmd_phys, pudval); + #ifdef CONFIG_PTP + __iee_pud_populate_fixmap(pudp, pmd_phys, PUD_TYPE_TABLE); + #else + __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE); + #endif pud = READ_ONCE(*pudp); } BUG_ON(pud_bad(pud)); @@ -345,7 +651,11 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, if (pud_sect_supported() && ((addr | next | phys) & ~PUD_MASK) == 0 && (flags & NO_BLOCK_MAPPINGS) == 0) { + #ifdef CONFIG_PTP + iee_pud_set_huge_fixmap(pudp, phys, prot); + #else pud_set_huge(pudp, phys, prot); + #endif /* * After the PUD entry has been populated once, we @@ -374,6 +684,10 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, { unsigned long addr, end, next; pgd_t *pgdp = pgd_offset_pgd(pgdir, virt); + #ifdef CONFIG_IEE + p4d_t *p4dp; + p4d_t p4d; + #endif /* * If the virtual and physical address don't have the same offset @@ -390,10 +704,14 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, next = pgd_addr_end(addr, end); alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc, flags); + #ifdef CONFIG_IEE + p4dp = p4d_offset(pgdp, addr); + p4d = READ_ONCE(*p4dp); + __p4d_populate(p4dp, __p4d_to_phys(p4d), (PGD_APT | PUD_TYPE_TABLE)); + #endif phys += next - addr; } while (pgdp++, addr = next, addr != end); } - static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, @@ -413,162 +731,844 @@ void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, phys_addr_t (*pgtable_alloc)(int), int flags); #endif -static phys_addr_t __pgd_pgtable_alloc(int shift) +#ifdef CONFIG_PTP +static int __init iee_pud_set_huge_pre_init(pud_t *pudp, phys_addr_t phys, pgprot_t prot) { - void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL); - BUG_ON(!ptr); + pud_t new_pud = pfn_pud(__phys_to_pfn(phys), mk_pud_sect_prot(prot)); - /* Ensure the zeroed page is visible to the page table walker */ - dsb(ishst); - return __pa(ptr); + /* Only allow permission changes for now */ + if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)), + pud_val(new_pud))) + return 0; + + VM_BUG_ON(phys & ~PUD_MASK); + iee_set_pud_pre_init(pudp, new_pud); + return 1; } -static phys_addr_t pgd_pgtable_alloc(int shift) +static int __init iee_pmd_set_huge_pre_init(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) { - phys_addr_t pa = __pgd_pgtable_alloc(shift); - struct ptdesc *ptdesc = page_ptdesc(phys_to_page(pa)); - - /* - * Call proper page table ctor in case later we need to - * call core mm functions like apply_to_page_range() on - * this pre-allocated page table. - * - * We don't select ARCH_ENABLE_SPLIT_PMD_PTLOCK if pmd is - * folded, and if so pagetable_pte_ctor() becomes nop. - */ - if (shift == PAGE_SHIFT) - BUG_ON(!pagetable_pte_ctor(ptdesc)); - else if (shift == PMD_SHIFT) - BUG_ON(!pagetable_pmd_ctor(ptdesc)); + pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), mk_pmd_sect_prot(prot)); - return pa; -} + /* Only allow permission changes for now */ + if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)), + pmd_val(new_pmd))) + return 0; -/* - * This function can only be used to modify existing table entries, - * without allocating new levels of table. Note that this permits the - * creation of new section or page entries. - */ -void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, - phys_addr_t size, pgprot_t prot) -{ - if (virt < PAGE_OFFSET) { - pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", - &phys, virt); - return; - } - __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, - NO_CONT_MAPPINGS); + VM_BUG_ON(phys & ~PMD_MASK); + iee_set_pmd_pre_init(pmdp, new_pmd); + return 1; } -void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, - unsigned long virt, phys_addr_t size, - pgprot_t prot, bool page_mappings_only) +static __init void iee_init_pte_pre_init(pmd_t *pmdp, unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot) { - int flags = 0; - - BUG_ON(mm == &init_mm); + pte_t *ptep; - if (page_mappings_only) - flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + ptep = pte_set_fixmap_offset_init(pmdp, addr); + do { + pte_t old_pte = READ_ONCE(*ptep); - __create_pgd_mapping(mm->pgd, phys, virt, size, prot, - pgd_pgtable_alloc, flags); -} + iee_set_pte_pre_init(ptep, pfn_pte(__phys_to_pfn(phys), prot)); -static void update_mapping_prot(phys_addr_t phys, unsigned long virt, - phys_addr_t size, pgprot_t prot) -{ - if (virt < PAGE_OFFSET) { - pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n", - &phys, virt); - return; - } + /* + * After the PTE entry has been populated once, we + * only allow updates to the permission attributes. + */ + BUG_ON(!pgattr_change_is_safe(pte_val(old_pte), + READ_ONCE(pte_val(*ptep)))); - __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, - NO_CONT_MAPPINGS); + phys += PAGE_SIZE; + } while (ptep++, addr += PAGE_SIZE, addr != end); - /* flush the TLBs after updating live kernel mappings */ - flush_tlb_kernel_range(virt, virt + size); + pte_clear_fixmap_init(); } -static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start, - phys_addr_t end, pgprot_t prot, int flags) +static __init void iee_alloc_init_cont_pte_pre_init(pmd_t *pmdp, unsigned long addr, + unsigned long end, phys_addr_t phys, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), + int flags) { - __create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start, - prot, early_pgtable_alloc, flags); -} + unsigned long next; + pmd_t pmd = READ_ONCE(*pmdp); -void __init mark_linear_text_alias_ro(void) -{ - /* - * Remove the write permissions from the linear alias of .text/.rodata - */ - update_mapping_prot(__pa_symbol(_stext), (unsigned long)lm_alias(_stext), - (unsigned long)__init_begin - (unsigned long)_stext, - PAGE_KERNEL_RO); -} + BUG_ON(pmd_sect(pmd)); + if (pmd_none(pmd)) { + pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN; + phys_addr_t pte_phys; -#ifdef CONFIG_KFENCE + if (flags & NO_EXEC_MAPPINGS) + pmdval |= PMD_TABLE_PXN; + BUG_ON(!pgtable_alloc); + pte_phys = pgtable_alloc(PAGE_SHIFT); + __iee_pmd_populate_pre_init(pmdp, pte_phys, pmdval); + pmd = READ_ONCE(*pmdp); + } + BUG_ON(pmd_bad(pmd)); -bool __ro_after_init kfence_early_init = !!CONFIG_KFENCE_SAMPLE_INTERVAL; + do { + pgprot_t __prot = prot; -/* early_param() will be parsed before map_mem() below. */ -static int __init parse_kfence_early_init(char *arg) -{ - int val; + next = pte_cont_addr_end(addr, end); - if (get_option(&arg, &val)) - kfence_early_init = !!val; + /* use a contiguous mapping if the range is suitably aligned */ + if ((((addr | next | phys) & ~CONT_PTE_MASK) == 0) && + (flags & NO_CONT_MAPPINGS) == 0) + __prot = __pgprot(pgprot_val(prot) | PTE_CONT); -#if IS_ENABLED(CONFIG_KFENCE_MUST_EARLY_INIT) - kfence_must_early_init = (val == -1) ? true : false; -#endif + iee_init_pte_pre_init(pmdp, addr, next, phys, __prot); - return 0; + phys += next - addr; + } while (addr = next, addr != end); } -early_param("kfence.sample_interval", parse_kfence_early_init); -static phys_addr_t __init arm64_kfence_alloc_pool(void) +static __init void iee_init_pmd_pre_init(pud_t *pudp, unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), int flags) { - phys_addr_t kfence_pool; + unsigned long next; + pmd_t *pmdp; - if (!kfence_early_init) - return 0; + pmdp = pmd_set_fixmap_offset_init(pudp, addr); + do { + pmd_t old_pmd = READ_ONCE(*pmdp); - kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE); - if (!kfence_pool) { - pr_err("failed to allocate kfence pool\n"); - kfence_early_init = false; - return 0; - } + next = pmd_addr_end(addr, end); - /* Temporarily mark as NOMAP. */ - memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE); + /* try section mapping first */ + if (((addr | next | phys) & ~PMD_MASK) == 0 && + (flags & NO_BLOCK_MAPPINGS) == 0) { + iee_pmd_set_huge_pre_init(pmdp, phys, prot); + + /* + * After the PMD entry has been populated once, we + * only allow updates to the permission attributes. + */ + BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd), + READ_ONCE(pmd_val(*pmdp)))); + } else { + iee_alloc_init_cont_pte_pre_init(pmdp, addr, next, phys, prot, + pgtable_alloc, flags); + + BUG_ON(pmd_val(old_pmd) != 0 && + pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp))); + } + phys += next - addr; + } while (pmdp++, addr = next, addr != end); + + pmd_clear_fixmap_init(); +} + +static __init void iee_alloc_init_cont_pmd_pre_init(pud_t *pudp, unsigned long addr, + unsigned long end, phys_addr_t phys, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), int flags) +{ + unsigned long next; + pud_t pud = READ_ONCE(*pudp); + + /* + * Check for initial section mappings in the pgd/pud. + */ + BUG_ON(pud_sect(pud)); + if (pud_none(pud)) { + pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_UXN; + phys_addr_t pmd_phys; + + if (flags & NO_EXEC_MAPPINGS) + pudval |= PUD_TABLE_PXN; + BUG_ON(!pgtable_alloc); + pmd_phys = pgtable_alloc(PMD_SHIFT); + __iee_pud_populate_pre_init(pudp, pmd_phys, pudval); + pud = READ_ONCE(*pudp); + } + BUG_ON(pud_bad(pud)); + + do { + pgprot_t __prot = prot; + + next = pmd_cont_addr_end(addr, end); + + /* use a contiguous mapping if the range is suitably aligned */ + if ((((addr | next | phys) & ~CONT_PMD_MASK) == 0) && + (flags & NO_CONT_MAPPINGS) == 0) + __prot = __pgprot(pgprot_val(prot) | PTE_CONT); + + iee_init_pmd_pre_init(pudp, addr, next, phys, __prot, pgtable_alloc, flags); + + phys += next - addr; + } while (addr = next, addr != end); +} + +static __init void iee_alloc_init_pud_pre_init(pgd_t *pgdp, unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), + int flags) +{ + unsigned long next; + pud_t *pudp; + p4d_t *p4dp = p4d_offset(pgdp, addr); + p4d_t p4d = READ_ONCE(*p4dp); + + if (p4d_none(p4d)) { + p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN; + phys_addr_t pud_phys; + + if (flags & NO_EXEC_MAPPINGS) + p4dval |= P4D_TABLE_PXN; + BUG_ON(!pgtable_alloc); + pud_phys = pgtable_alloc(PUD_SHIFT); + __iee_p4d_populate_pre_init(p4dp, pud_phys, p4dval); + p4d = READ_ONCE(*p4dp); + } + BUG_ON(p4d_bad(p4d)); + + pudp = pud_set_fixmap_offset_init(p4dp, addr); + do { + pud_t old_pud = READ_ONCE(*pudp); + + next = pud_addr_end(addr, end); + + /* + * For 4K granule only, attempt to put down a 1GB block + */ + if (pud_sect_supported() && + ((addr | next | phys) & ~PUD_MASK) == 0 && + (flags & NO_BLOCK_MAPPINGS) == 0) { + iee_pud_set_huge_pre_init(pudp, phys, prot); + + /* + * After the PUD entry has been populated once, we + * only allow updates to the permission attributes. + */ + BUG_ON(!pgattr_change_is_safe(pud_val(old_pud), + READ_ONCE(pud_val(*pudp)))); + } else { + iee_alloc_init_cont_pmd_pre_init(pudp, addr, next, phys, prot, + pgtable_alloc, flags); + + BUG_ON(pud_val(old_pud) != 0 && + pud_val(old_pud) != READ_ONCE(pud_val(*pudp))); + } + phys += next - addr; + } while (pudp++, addr = next, addr != end); + + pud_clear_fixmap_init(); +} + +static __init void __iee_create_pgd_mapping_locked_pre_init(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), + int flags) +{ + unsigned long addr, end, next; + pgd_t *pgdp = pgd_offset_pgd(pgdir, virt); + p4d_t *p4dp; + p4d_t p4d; + + /* + * If the virtual and physical address don't have the same offset + * within a page, we cannot map the region as the caller expects. + */ + if (WARN_ON((phys ^ virt) & ~PAGE_MASK)) + return; + + phys &= PAGE_MASK; + addr = virt & PAGE_MASK; + end = PAGE_ALIGN(virt + size); + + do { + next = pgd_addr_end(addr, end); + iee_alloc_init_pud_pre_init(pgdp, addr, next, phys, prot, pgtable_alloc, + flags); + p4dp = p4d_offset(pgdp, addr); + p4d = READ_ONCE(*p4dp); + __iee_p4d_populate_pre_init(p4dp, __p4d_to_phys(p4d), (PGD_APT | PUD_TYPE_TABLE)); + phys += next - addr; + } while (pgdp++, addr = next, addr != end); +} + +static __init void __iee_create_pgd_mapping_pre_init(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), + int flags) +{ + mutex_lock(&fixmap_lock); + __iee_create_pgd_mapping_locked_pre_init(pgdir, phys, virt, size, prot, + pgtable_alloc, flags); + mutex_unlock(&fixmap_lock); +} +#endif + +static phys_addr_t __pgd_pgtable_alloc(int shift) +{ + #ifdef CONFIG_PTP + unsigned long iee_addr; + #endif + void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL); + BUG_ON(!ptr); + + #ifdef CONFIG_PTP + iee_addr = __phys_to_iee(__pa(ptr)); + set_iee_page_valid(iee_addr); + iee_set_logical_mem_ro((unsigned long)ptr); + #endif + + /* Ensure the zeroed page is visible to the page table walker */ + dsb(ishst); + return __pa(ptr); +} + +static phys_addr_t pgd_pgtable_alloc(int shift) +{ + phys_addr_t pa = __pgd_pgtable_alloc(shift); + struct ptdesc *ptdesc = page_ptdesc(phys_to_page(pa)); + + /* + * Call proper page table ctor in case later we need to + * call core mm functions like apply_to_page_range() on + * this pre-allocated page table. + * + * We don't select ARCH_ENABLE_SPLIT_PMD_PTLOCK if pmd is + * folded, and if so pagetable_pte_ctor() becomes nop. + */ + if (shift == PAGE_SHIFT) + BUG_ON(!pagetable_pte_ctor(ptdesc)); + else if (shift == PMD_SHIFT) + BUG_ON(!pagetable_pmd_ctor(ptdesc)); + + return pa; +} + +/* + * This function can only be used to modify existing table entries, + * without allocating new levels of table. Note that this permits the + * creation of new section or page entries. + */ +void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) +{ + if (virt < PAGE_OFFSET) { + pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", + &phys, virt); + return; + } + + #ifdef CONFIG_PTP + __iee_create_pgd_mapping_pre_init(init_mm.pgd, phys, virt, size, prot, NULL, + NO_CONT_MAPPINGS); + #else + __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, + NO_CONT_MAPPINGS); + #endif +} + +void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, bool page_mappings_only) +{ + int flags = 0; + + BUG_ON(mm == &init_mm); + + if (page_mappings_only) + flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + + __create_pgd_mapping(mm->pgd, phys, virt, size, prot, + pgd_pgtable_alloc, flags); +} + +static void update_mapping_prot(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) +{ + if (virt < PAGE_OFFSET) { + pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n", + &phys, virt); + return; + } + + __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, + NO_CONT_MAPPINGS); + + /* flush the TLBs after updating live kernel mappings */ + flush_tlb_kernel_range(virt, virt + size); +} + +static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start, + phys_addr_t end, pgprot_t prot, int flags) +{ + #ifdef CONFIG_PTP + __iee_create_pgd_mapping_pre_init(pgdp, start, __phys_to_virt(start), end - start, + prot, early_pgtable_alloc, flags); + #else + __create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start, + prot, early_pgtable_alloc, flags); + #endif +} + +void __init mark_linear_text_alias_ro(void) +{ + /* + * Remove the write permissions from the linear alias of .text/.rodata + */ + update_mapping_prot(__pa_symbol(_stext), (unsigned long)lm_alias(_stext), + (unsigned long)__init_begin - (unsigned long)_stext, + PAGE_KERNEL_RO); +} + +#ifdef CONFIG_KFENCE + +bool __ro_after_init kfence_early_init = !!CONFIG_KFENCE_SAMPLE_INTERVAL; + +/* early_param() will be parsed before map_mem() below. */ +static int __init parse_kfence_early_init(char *arg) +{ + int val; + + if (get_option(&arg, &val)) + kfence_early_init = !!val; + +#if IS_ENABLED(CONFIG_KFENCE_MUST_EARLY_INIT) + kfence_must_early_init = (val == -1) ? true : false; +#endif + + return 0; +} +early_param("kfence.sample_interval", parse_kfence_early_init); + +static phys_addr_t __init arm64_kfence_alloc_pool(void) +{ + phys_addr_t kfence_pool; + + if (!kfence_early_init) + return 0; + + kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE); + if (!kfence_pool) { + pr_err("failed to allocate kfence pool\n"); + kfence_early_init = false; + return 0; + } + + /* Temporarily mark as NOMAP. */ + memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE); + + return kfence_pool; +} + +static void __init arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) +{ + if (!kfence_pool) + return; + + /* KFENCE pool needs page-level mapping. */ + __map_memblock(pgdp, kfence_pool, kfence_pool + KFENCE_POOL_SIZE, + pgprot_tagged(PAGE_KERNEL), + NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); + memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE); + __kfence_pool = phys_to_virt(kfence_pool); +} +#else /* CONFIG_KFENCE */ + +static inline phys_addr_t arm64_kfence_alloc_pool(void) { return 0; } +static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) { } + +#endif /* CONFIG_KFENCE */ + +static void __init map_mem(pgd_t *pgdp) +{ + static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN); + phys_addr_t kernel_start = __pa_symbol(_stext); + phys_addr_t kernel_end = __pa_symbol(__init_begin); + phys_addr_t start, end; + phys_addr_t early_kfence_pool; + int flags = NO_EXEC_MAPPINGS; + u64 i; + + /* + * Setting hierarchical PXNTable attributes on table entries covering + * the linear region is only possible if it is guaranteed that no table + * entries at any level are being shared between the linear region and + * the vmalloc region. Check whether this is true for the PGD level, in + * which case it is guaranteed to be true for all other levels as well. + */ + BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end)); + + early_kfence_pool = arm64_kfence_alloc_pool(); + + if (can_set_direct_map()) + flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + + #ifdef CONFIG_IEE + flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + #endif + /* + * Take care not to create a writable alias for the + * read-only text and rodata sections of the kernel image. + * So temporarily mark them as NOMAP to skip mappings in + * the following for-loop + */ + memblock_mark_nomap(kernel_start, kernel_end - kernel_start); + + /* map all the memory banks */ + for_each_mem_range(i, &start, &end) { + if (start >= end) + break; + /* + * The linear map must allow allocation tags reading/writing + * if MTE is present. Otherwise, it has the same attributes as + * PAGE_KERNEL. + */ + __map_memblock(pgdp, start, end, pgprot_tagged(PAGE_KERNEL), + flags); + } - return kfence_pool; + /* + * Map the linear alias of the [_stext, __init_begin) interval + * as non-executable now, and remove the write permission in + * mark_linear_text_alias_ro() below (which will be called after + * alternative patching has completed). This makes the contents + * of the region accessible to subsystems such as hibernate, + * but protects it from inadvertent modification or execution. + * Note that contiguous mappings cannot be remapped in this way, + * so we should avoid them here. + */ + #ifdef CONFIG_IEE + __map_memblock(pgdp, kernel_start, kernel_end, + PAGE_KERNEL, flags); + #else + __map_memblock(pgdp, kernel_start, kernel_end, + PAGE_KERNEL, NO_CONT_MAPPINGS); + #endif + memblock_clear_nomap(kernel_start, kernel_end - kernel_start); + arm64_kfence_map_pool(early_kfence_pool, pgdp); +} + +void mark_rodata_ro(void) +{ + unsigned long section_size; + + /* + * mark .rodata as read only. Use __init_begin rather than __end_rodata + * to cover NOTES and EXCEPTION_TABLE. + */ + section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata; + update_mapping_prot(__pa_symbol(__start_rodata), (unsigned long)__start_rodata, + section_size, PAGE_KERNEL_RO); + + debug_checkwx(); +} + +static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end, + pgprot_t prot, struct vm_struct *vma, + int flags, unsigned long vm_flags) +{ + phys_addr_t pa_start = __pa_symbol(va_start); + unsigned long size = va_end - va_start; + + BUG_ON(!PAGE_ALIGNED(pa_start)); + BUG_ON(!PAGE_ALIGNED(size)); + + #ifdef CONFIG_PTP + __iee_create_pgd_mapping_pre_init(pgdp, pa_start, (unsigned long)va_start, size, prot, + early_pgtable_alloc, flags); + #else + __create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot, + early_pgtable_alloc, flags); + #endif + + if (!(vm_flags & VM_NO_GUARD)) + size += PAGE_SIZE; + + vma->addr = va_start; + vma->phys_addr = pa_start; + vma->size = size; + vma->flags = VM_MAP | vm_flags; + vma->caller = __builtin_return_address(0); + + vm_area_add_early(vma); +} + +static pgprot_t kernel_exec_prot(void) +{ + return rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; +} + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +static int __init map_entry_trampoline(void) +{ + int i; + + pgprot_t prot = kernel_exec_prot(); + phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start); + + /* The trampoline is always mapped and can therefore be global */ + pgprot_val(prot) &= ~PTE_NG; + + /* Map only the text into the trampoline page table */ + memset(tramp_pg_dir, 0, PGD_SIZE); + #ifdef CONFIG_PTP + iee_set_logical_mem_ro((unsigned long)tramp_pg_dir); + #endif + __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, + entry_tramp_text_size(), prot, + __pgd_pgtable_alloc, NO_BLOCK_MAPPINGS); + + /* Map both the text and data into the kernel page table */ + for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++) + __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, + pa_start + i * PAGE_SIZE, prot); + + if (IS_ENABLED(CONFIG_RELOCATABLE)) + __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, + pa_start + i * PAGE_SIZE, PAGE_KERNEL_RO); + + return 0; +} +core_initcall(map_entry_trampoline); +#endif + +/* + * Open coded check for BTI, only for use to determine configuration + * for early mappings for before the cpufeature code has run. + */ +static bool arm64_early_this_cpu_has_bti(void) +{ + u64 pfr1; + + if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) + return false; + + pfr1 = __read_sysreg_by_encoding(SYS_ID_AA64PFR1_EL1); + return cpuid_feature_extract_unsigned_field(pfr1, + ID_AA64PFR1_EL1_BT_SHIFT); +} + +#ifdef CONFIG_IEE +/* Set PMD APTable of iee si codes as (1,1) to revert it to ROX P pages when HPD1=0. */ +static void __init iee_si_set_pmd_APtable(unsigned long addr, pgd_t *pgdir) +{ + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + + p4d_t *p4dp = p4d_offset(pgdp, addr); + + #ifdef CONFIG_PTP + pud_t *pudp = pud_set_fixmap_offset_init(p4dp, addr); + + pmd_t *pmdp = pmd_set_fixmap_offset_init(pudp, addr); + + pmd_t pmd = READ_ONCE(*pmdp); + + __iee_pmd_populate_pre_init(pmdp, __pmd_to_phys(pmd), PGD_APT_RO | PGD_APT | PMD_TYPE_TABLE); + + pud_clear_fixmap_init(); + pmd_clear_fixmap_init(); + #else + pud_t *pudp = pud_set_fixmap_offset(p4dp, addr); + + pmd_t *pmdp = pmd_set_fixmap_offset(pudp, addr); + + pmd_t pmd = READ_ONCE(*pmdp); + + __pmd_populate(pmdp, __pmd_to_phys(pmd), PGD_APT_RO | PGD_APT | PMD_TYPE_TABLE); + + pud_clear_fixmap(); + pmd_clear_fixmap(); + #endif +} +/* Set PMD APTable of iee si codes as (1,1) to revert it to ROX P pages when HPD1=0. */ +static void __init mark_iee_si_pmd_APtable(pgd_t *pgdir) +{ + unsigned long addr = (unsigned long)__iee_si_start; + iee_si_set_pmd_APtable(addr, pgdir); + // iee rwx gate exit may be mapped by another pmd. + iee_si_set_pmd_APtable(addr + PAGE_SIZE, pgdir); +} +#endif + +/* + * Create fine-grained mappings for the kernel. + */ +static void __init map_kernel(pgd_t *pgdp) +{ + static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext, + vmlinux_initdata, vmlinux_data; + + #ifdef CONFIG_IEE + static struct vm_struct vmlinux_iee_code, vmlinux_iee_data, vmlinux_iee_gate, vmlinux_text_end; + #endif + + /* + * External debuggers may need to write directly to the text + * mapping to install SW breakpoints. Allow this (only) when + * explicitly requested with rodata=off. + */ + pgprot_t text_prot = kernel_exec_prot(); + + /* + * If we have a CPU that supports BTI and a kernel built for + * BTI then mark the kernel executable text as guarded pages + * now so we don't have to rewrite the page tables later. + */ + if (arm64_early_this_cpu_has_bti()) + text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP); + + /* + * Only rodata will be remapped with different permissions later on, + * all other segments are allowed to use contiguous mappings. + */ + #ifdef CONFIG_IEE + map_kernel_segment(pgdp, _stext, __iee_si_data_start, text_prot, &vmlinux_text, + 0, VM_NO_GUARD); + /* Set iee si data RW. */ + map_kernel_segment(pgdp, __iee_si_data_start, __iee_exec_entry_start, SET_NG(PAGE_KERNEL), + &vmlinux_iee_data, NO_CONT_MAPPINGS | NO_BLOCK_MAPPINGS, VM_NO_GUARD); + /* Set iee entry codes NG. */ + map_kernel_segment(pgdp, __iee_exec_entry_start, __iee_si_start, SET_NG(text_prot), &vmlinux_iee_gate, + NO_CONT_MAPPINGS | NO_BLOCK_MAPPINGS, VM_NO_GUARD); + /* Map __iee_si_start - __iee_si_end as U RWX pages and set PMD APTABLE = (1,1). */ + map_kernel_segment(pgdp, __iee_si_start, __iee_si_end, SET_NG((PAGE_KERNEL_EXEC)), + &vmlinux_iee_code, NO_CONT_MAPPINGS | NO_BLOCK_MAPPINGS, VM_NO_GUARD); + mark_iee_si_pmd_APtable(pgdp); + + map_kernel_segment(pgdp, __iee_si_end, _etext, text_prot, &vmlinux_text_end, 0, + VM_NO_GUARD); + + map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL, + &vmlinux_rodata, NO_CONT_MAPPINGS | NO_BLOCK_MAPPINGS, VM_NO_GUARD); + map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot, + &vmlinux_inittext, 0, VM_NO_GUARD); + map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL, + &vmlinux_initdata, 0, VM_NO_GUARD); + map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, NO_CONT_MAPPINGS | NO_BLOCK_MAPPINGS, 0); + #else + map_kernel_segment(pgdp, _stext, _etext, text_prot, &vmlinux_text, 0, + VM_NO_GUARD); + map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL, + &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD); + map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot, + &vmlinux_inittext, 0, VM_NO_GUARD); + map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL, + &vmlinux_initdata, 0, VM_NO_GUARD); + map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0); + #endif + + + fixmap_copy(pgdp); + kasan_copy_shadow(pgdp); +} + +static void __init create_idmap(void) +{ + u64 start = __pa_symbol(__idmap_text_start); + u64 size = __pa_symbol(__idmap_text_end) - start; + pgd_t *pgd = idmap_pg_dir; + u64 pgd_phys; + + /* check if we need an additional level of translation */ + if (VA_BITS < 48 && idmap_t0sz < (64 - VA_BITS_MIN)) { + pgd_phys = early_pgtable_alloc(PAGE_SHIFT); + set_pgd(&idmap_pg_dir[start >> VA_BITS], + __pgd(pgd_phys | P4D_TYPE_TABLE)); + pgd = __va(pgd_phys); + } + #ifdef CONFIG_PTP + __iee_create_pgd_mapping_pre_init(pgd, start, start, size, PAGE_KERNEL_ROX, + early_pgtable_alloc, 0); + #else + __create_pgd_mapping(pgd, start, start, size, PAGE_KERNEL_ROX, + early_pgtable_alloc, 0); + #endif + + if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) { + extern u32 __idmap_kpti_flag; + u64 pa = __pa_symbol(&__idmap_kpti_flag); + + /* + * The KPTI G-to-nG conversion code needs a read-write mapping + * of its synchronization flag in the ID map. + */ + #ifdef CONFIG_PTP + __iee_create_pgd_mapping_pre_init(pgd, pa, pa, sizeof(u32), PAGE_KERNEL, + early_pgtable_alloc, 0); + #else + __create_pgd_mapping(pgd, pa, pa, sizeof(u32), PAGE_KERNEL, + early_pgtable_alloc, 0); + #endif + } +} + +#ifdef CONFIG_IEE +static void __create_pgd_mapping_for_iee_locked(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), + int flags) +{ + unsigned long addr, end, next; + pgd_t *pgdp = pgd_offset_pgd(pgdir, virt); + p4d_t *p4dp; + p4d_t p4d; + + /* + * If the virtual and physical address don't have the same offset + * within a page, we cannot map the region as the caller expects. + */ + if (WARN_ON((phys ^ virt) & ~PAGE_MASK)) + return; + + phys &= PAGE_MASK; + addr = virt & PAGE_MASK; + end = PAGE_ALIGN(virt + size); + + do { + next = pgd_addr_end(addr, end); + #ifdef CONFIG_PTP + iee_alloc_init_pud_pre_init(pgdp, addr, next, phys, prot, pgtable_alloc, + flags); + #else + alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc, + flags); + #endif + p4dp = p4d_offset(pgdp, addr); + p4d = READ_ONCE(*p4dp); + #ifdef CONFIG_PTP + __iee_p4d_populate_pre_init(p4dp, __p4d_to_phys(p4d), (PGD_APT | PGD_PXN | PGD_UXN | PUD_TYPE_TABLE)); + #else + __p4d_populate(p4dp, __p4d_to_phys(p4d), (PGD_APT | PGD_PXN | PGD_UXN | PUD_TYPE_TABLE)); + #endif + phys += next - addr; + } while (pgdp++, addr = next, addr != end); +} + +static void __create_pgd_mapping_for_iee(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), + int flags) +{ + mutex_lock(&fixmap_lock); + __create_pgd_mapping_for_iee_locked(pgdir, phys, virt, size, prot, + pgtable_alloc, flags); + mutex_unlock(&fixmap_lock); } -static void __init arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) +static void __init __map_memblock_for_iee(pgd_t *pgdp, phys_addr_t start, + phys_addr_t end, pgprot_t prot, int flags) { - if (!kfence_pool) - return; - - /* KFENCE pool needs page-level mapping. */ - __map_memblock(pgdp, kfence_pool, kfence_pool + KFENCE_POOL_SIZE, - pgprot_tagged(PAGE_KERNEL), - NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); - memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE); - __kfence_pool = phys_to_virt(kfence_pool); + #ifdef CONFIG_PTP + __create_pgd_mapping_for_iee(pgdp, start, __phys_to_iee(start), end - start, + prot, early_pgtable_alloc, flags); + #else + __create_pgd_mapping_for_iee(pgdp, start, __phys_to_iee(start), end - start, + prot, early_pgtable_alloc, flags); + #endif } -#else /* CONFIG_KFENCE */ -static inline phys_addr_t arm64_kfence_alloc_pool(void) { return 0; } -static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) { } - -#endif /* CONFIG_KFENCE */ - -static void __init map_mem(pgd_t *pgdp) +static void __init map_iee(pgd_t *pgdp) { static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN); phys_addr_t kernel_start = __pa_symbol(_stext); @@ -578,6 +1578,8 @@ static void __init map_mem(pgd_t *pgdp) int flags = NO_EXEC_MAPPINGS; u64 i; + flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + /* * Setting hierarchical PXNTable attributes on table entries covering * the linear region is only possible if it is guaranteed that no table @@ -589,9 +1591,6 @@ static void __init map_mem(pgd_t *pgdp) early_kfence_pool = arm64_kfence_alloc_pool(); - if (can_set_direct_map()) - flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; - /* * Take care not to create a writable alias for the * read-only text and rodata sections of the kernel image. @@ -609,12 +1608,11 @@ static void __init map_mem(pgd_t *pgdp) * if MTE is present. Otherwise, it has the same attributes as * PAGE_KERNEL. */ - __map_memblock(pgdp, start, end, pgprot_tagged(PAGE_KERNEL), - flags); + __map_memblock_for_iee(pgdp, start, end, SET_NG(SET_INVALID(SET_UPAGE(PAGE_KERNEL))), flags); } /* - * Map the linear alias of the [_stext, __init_begin) interval + * Map the linear alias of the [_text, __init_begin) interval * as non-executable now, and remove the write permission in * mark_linear_text_alias_ro() below (which will be called after * alternative patching has completed). This makes the contents @@ -623,178 +1621,384 @@ static void __init map_mem(pgd_t *pgdp) * Note that contiguous mappings cannot be remapped in this way, * so we should avoid them here. */ - __map_memblock(pgdp, kernel_start, kernel_end, - PAGE_KERNEL, NO_CONT_MAPPINGS); + __map_memblock_for_iee(pgdp, kernel_start, kernel_end, + SET_NG(SET_INVALID(SET_UPAGE(PAGE_KERNEL))), flags); memblock_clear_nomap(kernel_start, kernel_end - kernel_start); arm64_kfence_map_pool(early_kfence_pool, pgdp); } -void mark_rodata_ro(void) +/* + * Change page access permission, whereas not handling huge pages. + * Only used on IEE init functions. + */ +static void __init iee_si_set_page_attr(unsigned long addr, pteval_t attr) { - unsigned long section_size; + unsigned long flag; + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); - /* - * mark .rodata as read only. Use __init_begin rather than __end_rodata - * to cover NOTES and EXCEPTION_TABLE. - */ - section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata; - update_mapping_prot(__pa_symbol(__start_rodata), (unsigned long)__start_rodata, - section_size, PAGE_KERNEL_RO); + p4d_t *p4dp = p4d_offset(pgdp, addr); - debug_checkwx(); + pud_t *pudp = pud_offset(p4dp, addr); + + pmd_t *pmdp = pmd_offset(pudp, addr); + + pte_t *ptep = pte_offset_kernel(pmdp, addr); + pte_t pte = READ_ONCE(*ptep); + + if(attr & PTE_RDONLY) + pte = __pte((pte_val(pte) | PTE_RDONLY) & ~PTE_DBM); + pte = __pte(pte_val(pte) | attr); + #ifdef CONFIG_PTP + // Write pgtable in IEE directly. + flag = local_daif_save(); + asm volatile ("msr pan, #0"); + WRITE_ONCE(*((pte_t *)(__phys_to_iee(__pa(ptep)))), pte); + asm volatile ("msr pan, #1"); + local_daif_restore(flag); + #else + WRITE_ONCE(*ptep, pte); + #endif +} + +/* Prepare data used for iee rwx gates. These data are setted only once. */ +void __init iee_si_prepare_data(void) +{ + unsigned long va; + // Record current TCR val after system init. + iee_si_tcr = read_sysreg(tcr_el1) & ~(SYS_TCR_IEE_SI); + // Mark iee data as RO and move it to iee after setting up. + va = (unsigned long)__iee_si_data_start; + iee_si_set_page_attr(va, PTE_RDONLY); + iee_si_set_page_attr(lm_alias(va)+iee_offset, 0x1 | PTE_RDONLY); + // Set iee sensitive inst code page U RWX here to hide it from kernel. + va = (unsigned long)__iee_si_start; + iee_si_set_page_attr(va, PTE_USER); + va = (unsigned long)__iee_si_start + PAGE_SIZE; + iee_si_set_page_attr(va, PTE_USER); + flush_tlb_all(); } -static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end, - pgprot_t prot, struct vm_struct *vma, - int flags, unsigned long vm_flags) +#endif + +#ifdef CONFIG_PTP +// Attention : Using set_xxx without adding offset. +static void __init set_iee_valid_pre_init(unsigned long addr) { - phys_addr_t pa_start = __pa_symbol(va_start); - unsigned long size = va_end - va_start; + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); - BUG_ON(!PAGE_ALIGNED(pa_start)); - BUG_ON(!PAGE_ALIGNED(size)); + p4d_t *p4dp = p4d_offset(pgdp, addr); - __create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot, - early_pgtable_alloc, flags); + pud_t *pudp = pud_offset(p4dp, addr); - if (!(vm_flags & VM_NO_GUARD)) - size += PAGE_SIZE; + pmd_t *pmdp = pmd_offset(pudp, addr); - vma->addr = va_start; - vma->phys_addr = pa_start; - vma->size = size; - vma->flags = VM_MAP | vm_flags; - vma->caller = __builtin_return_address(0); + pte_t *ptep = pte_offset_kernel(pmdp, addr); + pte_t pte = READ_ONCE(*ptep); - vm_area_add_early(vma); + if((addr < (PAGE_OFFSET + IEE_OFFSET)) | (addr > (PAGE_OFFSET + BIT(vabits_actual - 1)))) + return; + + pte = __pte(pte_val(pte) | 0x1); + iee_set_pte_pre_init(ptep, pte); + flush_tlb_kernel_range(addr, addr+PAGE_SIZE); + isb(); } -static pgprot_t kernel_exec_prot(void) +static void __init move_pte_table_into_iee(pmd_t *pmdp, unsigned long addr, unsigned long end) { - return rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; + pmd_t pmd = READ_ONCE(*pmdp); + unsigned long iee_addr = __phys_to_iee(__pmd_to_phys(pmd)); + set_iee_valid_pre_init(iee_addr); } -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -static int __init map_entry_trampoline(void) +static void __init move_pmd_table_into_iee(pud_t *pudp, unsigned long addr, unsigned long end) { - int i; + unsigned long next; + pud_t pud = READ_ONCE(*pudp); + pmd_t *pmdp; + pmd_t pmd; + unsigned long iee_addr = __phys_to_iee(__pud_to_phys(pud)); + set_iee_valid_pre_init(iee_addr); - pgprot_t prot = kernel_exec_prot(); - phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start); + pmdp = pmd_offset(pudp, addr); + do { + next = pmd_addr_end(addr, end); + pmd = READ_ONCE(*pmdp); + if((pmd_val(pmd) & PMD_TABLE_BIT) == 0) + { + continue; + } + else + { + move_pte_table_into_iee(pmdp, addr, next); + } + } while (pmdp++, addr = next, addr != end); +} - /* The trampoline is always mapped and can therefore be global */ - pgprot_val(prot) &= ~PTE_NG; +static void __init move_pud_table_into_iee(pgd_t *pgdp, unsigned long addr, unsigned long end) +{ + unsigned long next; + p4d_t *p4dp = p4d_offset(pgdp, addr); + p4d_t p4d = READ_ONCE(*p4dp); + pud_t *pudp; + pud_t pud; + unsigned long iee_addr = __phys_to_iee(__p4d_to_phys(p4d)); + set_iee_valid_pre_init(iee_addr); - /* Map only the text into the trampoline page table */ - memset(tramp_pg_dir, 0, PGD_SIZE); - __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, - entry_tramp_text_size(), prot, - __pgd_pgtable_alloc, NO_BLOCK_MAPPINGS); + pudp = pud_offset(p4dp, addr); + do { + next = pud_addr_end(addr, end); + pud = READ_ONCE(*pudp); + if ((pud_val(pud) & PUD_TABLE_BIT) == 0) + { + continue; + } + else + { + move_pmd_table_into_iee(pudp, addr, next); + } + } while (pudp++, addr = next, addr != end); +} - /* Map both the text and data into the kernel page table */ - for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++) - __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, - pa_start + i * PAGE_SIZE, prot); +static void __init init_iee_for_one_region(pgd_t *pgdir, unsigned long va_start, unsigned long va_end) +{ + unsigned long addr, end, next; + pgd_t *pgdp = pgd_offset_pgd(pgdir, va_start); - if (IS_ENABLED(CONFIG_RELOCATABLE)) - __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, - pa_start + i * PAGE_SIZE, PAGE_KERNEL_RO); + addr = va_start & PAGE_MASK; + end = PAGE_ALIGN(va_end); - return 0; + do { + next = pgd_addr_end(addr, end); + move_pud_table_into_iee(pgdp, addr, next); + } while (pgdp++, addr = next, addr != end); } -core_initcall(map_entry_trampoline); -#endif -/* - * Open coded check for BTI, only for use to determine configuration - * for early mappings for before the cpufeature code has run. - */ -static bool arm64_early_this_cpu_has_bti(void) +static void __init init_iee(void) { - u64 pfr1; + unsigned long iee_addr; + phys_addr_t start, end; + u64 i; + pgd_t *pgdp; + + #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + // handling 1-level tramp page table tramp_pg_dir + iee_addr = (unsigned long)__phys_to_iee(__pa_symbol(tramp_pg_dir)); + set_iee_valid_pre_init(iee_addr); + #endif + // handling 1-level page table swapper_pg_dir + pgdp = swapper_pg_dir; + iee_addr = (unsigned long)__phys_to_iee(__pa_symbol(swapper_pg_dir)); + set_iee_valid_pre_init(iee_addr); + // handling 2/3/4-level page table for kernel + init_iee_for_one_region(pgdp, (unsigned long)_text, (unsigned long)_etext); + init_iee_for_one_region(pgdp, (unsigned long)__start_rodata, (unsigned long)__inittext_begin); + init_iee_for_one_region(pgdp, (unsigned long)__inittext_begin, (unsigned long)__inittext_end); + init_iee_for_one_region(pgdp, (unsigned long)__initdata_begin, (unsigned long)__initdata_end); + init_iee_for_one_region(pgdp, (unsigned long)_data, (unsigned long)_end); + // handling 2/3/4-level page table for fixmap i.e. remap bm_xxx + iee_addr = (unsigned long)__phys_to_iee(__pa_symbol(bm_pte_addr)); + set_iee_valid_pre_init(iee_addr); + iee_addr = (unsigned long)__phys_to_iee(__pa_symbol(bm_pmd_addr)); + set_iee_valid_pre_init(iee_addr); + iee_addr = (unsigned long)__phys_to_iee(__pa_symbol(bm_pud_addr)); + set_iee_valid_pre_init(iee_addr); + // handling 2/3/4-level page table for logical mem and iee + for_each_mem_range(i, &start, &end) { + if (start >= end) + break; + /* + * The linear map must allow allocation tags reading/writing + * if MTE is present. Otherwise, it has the same attributes as + * PAGE_KERNEL. + */ + init_iee_for_one_region(pgdp, (unsigned long)__va(start), (unsigned long)__va(end)); + init_iee_for_one_region(pgdp, (unsigned long)__phys_to_iee(start), (unsigned long)__phys_to_iee(end)); + } +} - if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) - return false; +static void iee_set_kernel_upage_pre_init(unsigned long addr) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); - pfr1 = __read_sysreg_by_encoding(SYS_ID_AA64PFR1_EL1); - return cpuid_feature_extract_unsigned_field(pfr1, - ID_AA64PFR1_EL1_BT_SHIFT); + p4d_t *p4dp = p4d_offset(pgdp, addr); + p4d_t p4d = READ_ONCE(*p4dp); + + pud_t *pudp = pud_offset(p4dp, addr); + + pmd_t *pmdp = pmd_offset(pudp, addr); + + pte_t *ptep = pte_offset_kernel(pmdp, addr); + + int i; + for(i = 0; i < 4; i++) + { + pte_t pte = READ_ONCE(*ptep); + pte = __pte(pte_val(pte) | PTE_USER | PTE_NG); + iee_set_pte_pre_init(ptep, pte); + ptep++; + } + flush_tlb_kernel_range(addr, addr+4*PAGE_SIZE); + isb(); } -/* - * Create fine-grained mappings for the kernel. - */ -static void __init map_kernel(pgd_t *pgdp) +static void __init iee_set_pte_table_ro(pmd_t *pmdp, unsigned long addr, unsigned long end) { - static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext, - vmlinux_initdata, vmlinux_data; + pmd_t pmd = READ_ONCE(*pmdp); + unsigned long logical_addr = (unsigned long)__va(__pmd_to_phys(pmd)); + iee_set_logical_mem_ro(logical_addr); +} - /* - * External debuggers may need to write directly to the text - * mapping to install SW breakpoints. Allow this (only) when - * explicitly requested with rodata=off. - */ - pgprot_t text_prot = kernel_exec_prot(); +static void __init iee_set_pmd_table_ro(pud_t *pudp, unsigned long addr, unsigned long end) +{ + unsigned long next; + pud_t pud = READ_ONCE(*pudp); + pmd_t *pmdp; + pmd_t pmd; + unsigned long logical_addr = (unsigned long)__va(__pud_to_phys(pud)); + iee_set_logical_mem_ro(logical_addr); - /* - * If we have a CPU that supports BTI and a kernel built for - * BTI then mark the kernel executable text as guarded pages - * now so we don't have to rewrite the page tables later. - */ - if (arm64_early_this_cpu_has_bti()) - text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP); + pmdp = pmd_offset(pudp, addr); + do { + next = pmd_addr_end(addr, end); + pmd = READ_ONCE(*pmdp); + if((pmd_val(pmd) & PMD_TABLE_BIT) == 0) + { + continue; + } + else + { + iee_set_pte_table_ro(pmdp, addr, next); + } + } while (pmdp++, addr = next, addr != end); +} - /* - * Only rodata will be remapped with different permissions later on, - * all other segments are allowed to use contiguous mappings. - */ - map_kernel_segment(pgdp, _stext, _etext, text_prot, &vmlinux_text, 0, - VM_NO_GUARD); - map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL, - &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD); - map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot, - &vmlinux_inittext, 0, VM_NO_GUARD); - map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL, - &vmlinux_initdata, 0, VM_NO_GUARD); - map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0); +static void __init iee_set_pud_table_ro(pgd_t *pgdp, unsigned long addr, unsigned long end) +{ + unsigned long next; + p4d_t *p4dp = p4d_offset(pgdp, addr); + p4d_t p4d = READ_ONCE(*p4dp); + pud_t *pudp; + pud_t pud; + unsigned long logical_addr = (unsigned long)__va(__p4d_to_phys(p4d)); + iee_set_logical_mem_ro(logical_addr); - fixmap_copy(pgdp); - kasan_copy_shadow(pgdp); + pudp = pud_offset(p4dp, addr); + do { + next = pud_addr_end(addr, end); + pud = READ_ONCE(*pudp); + if ((pud_val(pud) & PUD_TABLE_BIT) == 0) + { + continue; + } + else + { + iee_set_pmd_table_ro(pudp, addr, next); + } + } while (pudp++, addr = next, addr != end); } -static void __init create_idmap(void) +static void __init iee_mark_pgtable_for_one_region_ro(pgd_t *pgdir, unsigned long va_start, unsigned long va_end) { - u64 start = __pa_symbol(__idmap_text_start); - u64 size = __pa_symbol(__idmap_text_end) - start; - pgd_t *pgd = idmap_pg_dir; - u64 pgd_phys; + unsigned long addr, end, next; + pgd_t *pgdp = pgd_offset_pgd(pgdir, va_start); - /* check if we need an additional level of translation */ - if (VA_BITS < 48 && idmap_t0sz < (64 - VA_BITS_MIN)) { - pgd_phys = early_pgtable_alloc(PAGE_SHIFT); - set_pgd(&idmap_pg_dir[start >> VA_BITS], - __pgd(pgd_phys | P4D_TYPE_TABLE)); - pgd = __va(pgd_phys); - } - __create_pgd_mapping(pgd, start, start, size, PAGE_KERNEL_ROX, - early_pgtable_alloc, 0); + addr = va_start & PAGE_MASK; + end = PAGE_ALIGN(va_end); - if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) { - extern u32 __idmap_kpti_flag; - u64 pa = __pa_symbol(&__idmap_kpti_flag); + do { + next = pgd_addr_end(addr, end); + iee_set_pud_table_ro(pgdp, addr, next); + } while (pgdp++, addr = next, addr != end); +} +static void __init iee_mark_all_lm_pgtable_ro(void) +{ + unsigned long logical_addr; + phys_addr_t start, end; + u64 i; + pgd_t *pgdp; + + // handling static allocated page table + #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + // handling 1-level tramp page table tramp_pg_dir + logical_addr = (unsigned long)__va(__pa_symbol(tramp_pg_dir)); + iee_set_logical_mem_ro(logical_addr); + #endif + // handling 1-level page table swapper_pg_dir + pgdp = swapper_pg_dir; + iee_set_logical_mem_ro((unsigned long)swapper_pg_dir); + logical_addr = (unsigned long)__va(__pa_symbol(swapper_pg_dir)); + iee_set_logical_mem_ro(logical_addr); + + // handling 2/3/4-level page table for kernel + iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)_text, (unsigned long)_etext); + iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)__start_rodata, (unsigned long)__inittext_begin); + iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)__inittext_begin, (unsigned long)__inittext_end); + iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)__initdata_begin, (unsigned long)__initdata_end); + iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)_data, (unsigned long)_end); + + // handling 2/3/4-level page table for fixmap i.e. remap bm_xxx + logical_addr = (unsigned long)__va(__pa_symbol(bm_pte_addr)); + iee_set_logical_mem_ro(logical_addr); + + iee_set_logical_mem_ro((unsigned long)bm_pmd_addr); + logical_addr = (unsigned long)__va(__pa_symbol(bm_pmd_addr)); + iee_set_logical_mem_ro(logical_addr); + + iee_set_logical_mem_ro((unsigned long)bm_pud_addr); + logical_addr = (unsigned long)__va(__pa_symbol(bm_pud_addr)); + iee_set_logical_mem_ro(logical_addr); + + // handling 2/3/4-level page table for logical mem and iee + for_each_mem_range(i, &start, &end) { + if (start >= end) + break; /* - * The KPTI G-to-nG conversion code needs a read-write mapping - * of its synchronization flag in the ID map. + * The linear map must allow allocation tags reading/writing + * if MTE is present. Otherwise, it has the same attributes as + * PAGE_KERNEL. */ - __create_pgd_mapping(pgd, pa, pa, sizeof(u32), PAGE_KERNEL, - early_pgtable_alloc, 0); + iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)__va(start), (unsigned long)__va(end)); + iee_mark_pgtable_for_one_region_ro(pgdp, (unsigned long)__phys_to_iee(start), (unsigned long)__phys_to_iee(end)); } } +#endif + +#ifdef CONFIG_KOI +extern s64 koi_offset; +#endif void __init paging_init(void) { + #ifdef CONFIG_IEE + unsigned long SP_EL0; + void *new; + void *init_token; + struct task_token *token; + unsigned long tcr; + + // Check if cpu has PAN and HPDS. + if(!cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64MMFR1_EL1), + ID_AA64MMFR1_EL1_PAN_SHIFT)) + panic("Architecture doesn't support PAN, please disable CONFIG_IEE.\n"); + + if(!cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64MMFR1_EL1), + ID_AA64MMFR1_EL1_HPDS_SHIFT)) + panic("Architecture doesn't support HPDS, please disable CONFIG_IEE.\n"); + #endif + + // Avoid using iee code to modify pgtable before iee initialized. + #ifdef CONFIG_PTP + pgd_t *pgdp = pgd_set_fixmap_init(__pa_symbol(swapper_pg_dir)); + #else pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir)); + #endif + + extern pgd_t init_idmap_pg_dir[]; idmap_t0sz = 63UL - __fls(__pa_symbol(_end) | GENMASK(VA_BITS_MIN - 1, 0)); @@ -802,7 +2006,17 @@ void __init paging_init(void) map_kernel(pgdp); map_mem(pgdp); + // Map the whole physical mem into IEE, but set invalid. + #ifdef CONFIG_IEE + map_iee(pgdp); + #endif + + // Avoid using iee code to modify pgtable before iee initialized. + #ifdef CONFIG_PTP + pgd_clear_fixmap_init(); + #else pgd_clear_fixmap(); + #endif cpu_replace_ttbr1(lm_alias(swapper_pg_dir), init_idmap_pg_dir); init_mm.pgd = swapper_pg_dir; @@ -813,6 +2027,80 @@ void __init paging_init(void) memblock_allow_resize(); create_idmap(); + + #ifdef CONFIG_IEE + // test iee_exec_entry + iee_rwx_gate_entry(IEE_SI_TEST); + // Initialize init iee stack. + #ifdef CONFIG_PTP + iee_set_kernel_upage_pre_init((unsigned long)init_iee_stack_begin); + iee_set_kernel_upage_pre_init((unsigned long)__va(__pa_symbol(init_iee_stack_begin))); + #else + iee_set_kernel_upage((unsigned long)init_iee_stack_begin); + iee_set_kernel_upage((unsigned long)__va(__pa_symbol(init_iee_stack_begin))); + #endif + #endif + + // Init token for init_task. + #ifdef CONFIG_IEE + // Change SP_EL0 from Image VA to Logical VA. + SP_EL0 = (unsigned long)__va(__pa_symbol(&init_task)); + write_sysreg(SP_EL0, sp_el0); + init_task.cpus_ptr = &(((struct task_struct *)(__va(__pa_symbol(&init_task))))->cpus_mask); + init_task.children.prev = (__va(__pa_symbol(init_task.children.prev))); + init_task.children.next = (__va(__pa_symbol(init_task.children.next))); + // Set init_task into __entry_task before per_cpu init. + *(struct task_struct **)__entry_task = __va(__pa_symbol(&init_task)); + // Alloc a page for init_token. + new = __va(early_pgtable_alloc(0)); + init_token = (void *)__phys_to_iee(__pa_symbol(&init_task)); + #ifdef CONFIG_PTP + iee_set_token_page_valid_pre_init(init_token, new); + #else + iee_set_token_page_valid(init_token, new); + #endif + // Use lm to write token before IEE initialized. + token = (struct task_token *)((unsigned long)new + (((unsigned long)&init_task) & ~PAGE_MASK)); + token->mm = &init_mm; + token->pgd = NULL; + token->iee_stack = (void *)init_iee_stack_end; + token->valid = true; + #endif + + #ifdef CONFIG_PTP + // Map the existing pgtable into IEE, set valid. + init_iee(); + #endif + + #ifdef CONFIG_IEE + sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_SPAN); + #endif + + #ifdef CONFIG_PTP + // IEE ready. + // Pgtable writing before uses logical memory and after uses IEE memory. + + // Set the logical va of existing pgtable readonly. + iee_mark_all_lm_pgtable_ro(); + #endif + + // Set the init token readonly. + #ifdef CONFIG_IEE + set_iee_page_valid(__phys_to_iee(__pa(new))); + iee_set_logical_mem_ro((unsigned long)new); + + // Set HPD1 as 1. + tcr = read_sysreg(tcr_el1); + tcr |= ((unsigned long)0x1 << 42); + write_sysreg(tcr, tcr_el1); + isb(); + + // Flush tlb to enable IEE. + flush_tlb_all(); + + // mark that iee is prepared. + iee_init_done = true; + #endif } #ifdef CONFIG_MEMORY_HOTPLUG diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 4a64089e5771..894bda11c389 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -15,14 +15,44 @@ #include <asm/page.h> #include <asm/tlbflush.h> +#ifdef CONFIG_PTP +#include <linux/iee-func.h> +#endif + static struct kmem_cache *pgd_cache __ro_after_init; +#ifdef CONFIG_KOI +pgd_t *koi_pgd_alloc(void) +{ + pgd_t *pgd; +#ifdef CONFIG_PTP + pgd = (pgd_t *)__get_free_page(GFP_PGTABLE_KERNEL); + unsigned long iee_addr = __phys_to_iee(__pa(pgd)); + set_iee_page_valid(iee_addr); + iee_set_logical_mem_ro((unsigned long)pgd); +#else + pgd = (pgd_t *)__get_free_page(GFP_PGTABLE_KERNEL); +#endif + return pgd; +} +#endif + pgd_t *pgd_alloc(struct mm_struct *mm) { gfp_t gfp = GFP_PGTABLE_USER; if (PGD_SIZE == PAGE_SIZE) +#ifdef CONFIG_PTP + { + pgd_t* new = (pgd_t *)__get_free_page(gfp); + unsigned long iee_addr = __phys_to_iee(__pa(new)); + set_iee_page_valid(iee_addr); + iee_set_logical_mem_ro((unsigned long)new); + return new; + } +#else return (pgd_t *)__get_free_page(gfp); +#endif else return kmem_cache_alloc(pgd_cache, gfp); } @@ -30,7 +60,16 @@ pgd_t *pgd_alloc(struct mm_struct *mm) void pgd_free(struct mm_struct *mm, pgd_t *pgd) { if (PGD_SIZE == PAGE_SIZE) +#ifdef CONFIG_PTP + { + unsigned long iee_addr = __phys_to_iee(__pa(pgd)); + set_iee_page_invalid(iee_addr); + iee_set_logical_mem_rw((unsigned long)pgd); + free_page((unsigned long)pgd); + } +#else free_page((unsigned long)pgd); +#endif else kmem_cache_free(pgd_cache, pgd); } diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 14fdf645edc8..230b2b883a51 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -22,6 +22,8 @@ #include <asm/smp.h> #include <asm/sysreg.h> + + #ifdef CONFIG_ARM64_64K_PAGES #define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K #elif defined(CONFIG_ARM64_16K_PAGES) @@ -105,6 +107,19 @@ SYM_FUNC_START(cpu_do_suspend) ret SYM_FUNC_END(cpu_do_suspend) + +#ifdef CONFIG_IEE +// SP_EL0 check failed. +SYM_FUNC_START_LOCAL(sp_el0_check_failed) +1: + nop + nop + nop + nop + b 1f +SYM_FUNC_END(sp_el0_check_failed) +#endif + /** * cpu_do_resume - restore CPU register context * @@ -148,6 +163,13 @@ SYM_FUNC_START(cpu_do_resume) msr sctlr_el1, x12 set_this_cpu_offset x13 msr sp_el0, x14 +#ifdef CONFIG_IEE + // tsk check. + ldr_this_cpu x2, __entry_task, x3 + mrs x3, sp_el0 + cmp x2, x3 + b.ne sp_el0_check_failed +#endif /* * Restore oslsr_el1 by writing oslar_el1 */ @@ -190,6 +212,7 @@ SYM_TYPED_FUNC_START(idmap_cpu_replace_ttbr1) __idmap_cpu_set_reserved_ttbr1 x1, x3 offset_ttbr1 x0, x3 + msr ttbr1_el1, x0 isb @@ -452,6 +475,11 @@ SYM_FUNC_START(__cpu_setup) orr tcr, tcr, #TCR_HA // hardware Access flag update 1: #endif /* CONFIG_ARM64_HW_AFDBM */ + +#ifdef CONFIG_IEE + orr tcr, tcr, #TCR_HPD1 // Hierarchical permission disables +#endif + msr mair_el1, mair msr tcr_el1, tcr diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c index 5139a28130c0..15d2a3faa048 100644 --- a/arch/arm64/mm/trans_pgd.c +++ b/arch/arm64/mm/trans_pgd.c @@ -25,6 +25,9 @@ #include <linux/mm.h> #include <linux/mmzone.h> #include <linux/kfence.h> +#ifdef CONFIG_PTP +#include <linux/iee-func.h> +#endif static void *trans_alloc(struct trans_pgd_info *info) { @@ -65,10 +68,18 @@ static int copy_pte(struct trans_pgd_info *info, pmd_t *dst_pmdp, pte_t *src_ptep; pte_t *dst_ptep; unsigned long addr = start; + #ifdef CONFIG_PTP + unsigned long iee_addr; + #endif dst_ptep = trans_alloc(info); if (!dst_ptep) return -ENOMEM; + #ifdef CONFIG_PTP + iee_addr = __phys_to_iee(__pa(dst_ptep)); + set_iee_page_valid(iee_addr); + iee_set_logical_mem_ro((unsigned long)dst_ptep); + #endif pmd_populate_kernel(NULL, dst_pmdp, dst_ptep); dst_ptep = pte_offset_kernel(dst_pmdp, start); @@ -87,11 +98,19 @@ static int copy_pmd(struct trans_pgd_info *info, pud_t *dst_pudp, pmd_t *dst_pmdp; unsigned long next; unsigned long addr = start; + #ifdef CONFIG_PTP + unsigned long iee_addr; + #endif if (pud_none(READ_ONCE(*dst_pudp))) { dst_pmdp = trans_alloc(info); if (!dst_pmdp) return -ENOMEM; + #ifdef CONFIG_PTP + iee_addr = __phys_to_iee(__pa(dst_pmdp)); + set_iee_page_valid(iee_addr); + iee_set_logical_mem_ro((unsigned long)dst_pmdp); + #endif pud_populate(NULL, dst_pudp, dst_pmdp); } dst_pmdp = pmd_offset(dst_pudp, start); @@ -123,11 +142,19 @@ static int copy_pud(struct trans_pgd_info *info, p4d_t *dst_p4dp, pud_t *src_pudp; unsigned long next; unsigned long addr = start; + #ifdef CONFIG_PTP + unsigned long iee_addr; + #endif if (p4d_none(READ_ONCE(*dst_p4dp))) { dst_pudp = trans_alloc(info); if (!dst_pudp) return -ENOMEM; + #ifdef CONFIG_PTP + iee_addr = __phys_to_iee(__pa(dst_pudp)); + set_iee_page_valid(iee_addr); + iee_set_logical_mem_ro((unsigned long)dst_pudp); + #endif p4d_populate(NULL, dst_p4dp, dst_pudp); } dst_pudp = pud_offset(dst_p4dp, start); @@ -212,6 +239,12 @@ int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **dst_pgdp, return -ENOMEM; } + #ifdef CONFIG_PTP + unsigned long iee_addr = __phys_to_iee(__pa(trans_pgd)); + set_iee_page_valid(iee_addr); + iee_set_logical_mem_ro((unsigned long)trans_pgd); + #endif + rc = copy_page_tables(info, trans_pgd, start, end); if (!rc) *dst_pgdp = trans_pgd; @@ -238,6 +271,9 @@ int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0, int bits_mapped = PAGE_SHIFT - 4; unsigned long level_mask, prev_level_entry, *levels[4]; int this_level, index, level_lsb, level_msb; + #ifdef CONFIG_PTP + unsigned long iee_addr; + #endif dst_addr &= PAGE_MASK; prev_level_entry = pte_val(pfn_pte(pfn, PAGE_KERNEL_ROX)); @@ -247,12 +283,22 @@ int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0, if (!levels[this_level]) return -ENOMEM; + #ifdef CONFIG_PTP + iee_addr = __phys_to_iee(__pa(levels[this_level])); + set_iee_page_valid(iee_addr); + iee_set_logical_mem_ro((unsigned long)levels[this_level]); + #endif + level_lsb = ARM64_HW_PGTABLE_LEVEL_SHIFT(this_level); level_msb = min(level_lsb + bits_mapped, max_msb); level_mask = GENMASK_ULL(level_msb, level_lsb); index = (dst_addr & level_mask) >> level_lsb; + #ifdef CONFIG_PTP + set_pte((pte_t *)(levels[this_level] + index), __pte(prev_level_entry)); + #else *(levels[this_level] + index) = prev_level_entry; + #endif pfn = virt_to_pfn(levels[this_level]); prev_level_entry = pte_val(pfn_pte(pfn, diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 83092d93f36a..fb12e7d0660a 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -94,7 +94,11 @@ static int __init arm_enable_runtime_services(void) return 0; } + #ifdef CONFIG_PTP + efi_memmap_unmap_after_init(); + #else efi_memmap_unmap(); + #endif mapsize = efi.memmap.desc_size * efi.memmap.nr_map; diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c index a1180461a445..4c64b6f15717 100644 --- a/drivers/firmware/efi/memmap.c +++ b/drivers/firmware/efi/memmap.c @@ -105,6 +105,26 @@ void __init efi_memmap_unmap(void) clear_bit(EFI_MEMMAP, &efi.flags); } +#ifdef CONFIG_PTP +void __init efi_memmap_unmap_after_init(void) +{ + if (!efi_enabled(EFI_MEMMAP)) + return; + + if (!(efi.memmap.flags & EFI_MEMMAP_LATE)) { + unsigned long size; + + size = efi.memmap.desc_size * efi.memmap.nr_map; + early_iounmap_after_init((__force void __iomem *)efi.memmap.map, size); + } else { + memunmap(efi.memmap.map); + } + + efi.memmap.map = NULL; + clear_bit(EFI_MEMMAP, &efi.flags); +} +#endif + /** * efi_memmap_init_late - Map efi.memmap with memremap() * @phys_addr: Physical address of the new EFI memory map diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c index a5fbb6ed38ae..81428783b9da 100644 --- a/drivers/tty/serial/earlycon.c +++ b/drivers/tty/serial/earlycon.c @@ -40,7 +40,11 @@ static void __iomem * __init earlycon_map(resource_size_t paddr, size_t size) { void __iomem *base; #ifdef CONFIG_FIX_EARLYCON_MEM + #ifdef CONFIG_PTP + __iee_set_fixmap_pre_init(FIX_EARLYCON_MEM_BASE, paddr & PAGE_MASK, FIXMAP_PAGE_IO); + #else set_fixmap_io(FIX_EARLYCON_MEM_BASE, paddr & PAGE_MASK); + #endif base = (void __iomem *)__fix_to_virt(FIX_EARLYCON_MEM_BASE); base += paddr & ~PAGE_MASK; #else diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c index 45b42d8f6453..b71072d6957e 100644 --- a/drivers/usb/early/ehci-dbgp.c +++ b/drivers/usb/early/ehci-dbgp.c @@ -879,7 +879,11 @@ int __init early_dbgp_init(char *s) * FIXME I don't have the bar size so just guess PAGE_SIZE is more * than enough. 1K is the biggest I have seen. */ + #ifdef CONFIG_PTP + __iee_set_fixmap_pre_init(FIX_DBGP_BASE, bar_val & PAGE_MASK, FIXMAP_PAGE_NOCACHE); + #else set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK); + #endif ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE); ehci_bar += bar_val & ~PAGE_MASK; dbgp_printk("ehci_bar: %p\n", ehci_bar); diff --git a/fs/coredump.c b/fs/coredump.c index 9d235fa14ab9..72be355903ca 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -53,6 +53,10 @@ #include <trace/events/sched.h> +#ifdef CONFIG_CREDP +#include <asm/iee-cred.h> +#endif + static bool dump_vma_snapshot(struct coredump_params *cprm); static void free_vma_snapshot(struct coredump_params *cprm); @@ -564,7 +568,11 @@ void do_coredump(const kernel_siginfo_t *siginfo) */ if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { /* Setuid core dump mode */ + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(cred,GLOBAL_ROOT_UID); + #else cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ + #endif need_suid_safe = true; } diff --git a/fs/exec.c b/fs/exec.c index 04fb89656cc3..3689c5f008ba 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -76,6 +76,14 @@ #include <trace/events/sched.h> +#ifdef CONFIG_CREDP +#include <asm/iee-cred.h> +#endif + +#ifdef CONFIG_IEE +#include <asm/iee-token.h> +#endif + static int bprm_creds_from_file(struct linux_binprm *bprm); int suid_dumpable = 0; @@ -1005,6 +1013,10 @@ static int exec_mmap(struct mm_struct *mm) if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) local_irq_enable(); activate_mm(active_mm, mm); + #ifdef CONFIG_IEE + iee_set_token_mm(tsk, mm); + iee_set_token_pgd(tsk, mm->pgd); + #endif if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) local_irq_enable(); lru_gen_add_mm(mm); @@ -1618,12 +1630,20 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file) if (mode & S_ISUID) { bprm->per_clear |= PER_CLEAR_ON_SETID; + #ifdef CONFIG_CREDP + iee_set_cred_euid(bprm->cred, vfsuid_into_kuid(vfsuid)); + #else bprm->cred->euid = vfsuid_into_kuid(vfsuid); + #endif } if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { bprm->per_clear |= PER_CLEAR_ON_SETID; + #ifdef CONFIG_CREDP + iee_set_cred_egid(bprm->cred, vfsgid_into_kgid(vfsgid)); + #else bprm->cred->egid = vfsgid_into_kgid(vfsgid); + #endif } } diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 3e724cb7ef01..e32e136e4271 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -15,6 +15,10 @@ #include <linux/sunrpc/metrics.h> +#ifdef CONFIG_CREDP +#include <asm/iee-cred.h> +#endif + #include "flexfilelayout.h" #include "../nfs4session.h" #include "../nfs4idmap.h" @@ -502,8 +506,13 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, rc = -ENOMEM; if (!kcred) goto out_err_free; + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(kcred,uid); + iee_set_cred_fsgid(kcred,gid); + #else kcred->fsuid = uid; kcred->fsgid = gid; + #endif cred = RCU_INITIALIZER(kcred); if (lgr->range.iomode == IOMODE_READ) diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 25a7c771cfd8..b15ab8e33e0e 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -48,6 +48,10 @@ #include <linux/module.h> #include <linux/user_namespace.h> +#ifdef CONFIG_CREDP +#include <asm/iee-cred.h> +#endif + #include "internal.h" #include "netns.h" #include "nfs4idmap.h" @@ -226,8 +230,13 @@ int nfs_idmap_init(void) goto failed_reg_legacy; set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); + #ifdef CONFIG_CREDP + iee_set_cred_thread_keyring(cred,keyring); + iee_set_cred_jit_keyring(cred,KEY_REQKEY_DEFL_THREAD_KEYRING); + #else cred->thread_keyring = keyring; cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; + #endif id_resolver_cache = cred; return 0; diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index e6beaaf4f170..e89385fd81f1 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -2,6 +2,9 @@ /* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ #include <linux/sched.h> +#ifdef CONFIG_CREDP +#include <asm/iee-cred.h> +#endif #include "nfsd.h" #include "auth.h" @@ -32,22 +35,40 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) if (!new) return -ENOMEM; + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new,rqstp->rq_cred.cr_uid); + iee_set_cred_fsgid(new,rqstp->rq_cred.cr_gid); + #else new->fsuid = rqstp->rq_cred.cr_uid; new->fsgid = rqstp->rq_cred.cr_gid; + #endif rqgi = rqstp->rq_cred.cr_group_info; if (flags & NFSEXP_ALLSQUASH) { + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new,exp->ex_anon_uid); + iee_set_cred_fsgid(new,exp->ex_anon_gid); + #else new->fsuid = exp->ex_anon_uid; new->fsgid = exp->ex_anon_gid; + #endif gi = groups_alloc(0); if (!gi) goto oom; } else if (flags & NFSEXP_ROOTSQUASH) { if (uid_eq(new->fsuid, GLOBAL_ROOT_UID)) + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new,exp->ex_anon_uid); + #else new->fsuid = exp->ex_anon_uid; + #endif if (gid_eq(new->fsgid, GLOBAL_ROOT_GID)) + #ifdef CONFIG_CREDP + iee_set_cred_fsgid(new,exp->ex_anon_gid); + #else new->fsgid = exp->ex_anon_gid; + #endif gi = groups_alloc(rqgi->ngroups); if (!gi) @@ -67,18 +88,35 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) } if (uid_eq(new->fsuid, INVALID_UID)) + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new,exp->ex_anon_uid); + #else new->fsuid = exp->ex_anon_uid; + #endif if (gid_eq(new->fsgid, INVALID_GID)) + #ifdef CONFIG_CREDP + iee_set_cred_fsgid(new,exp->ex_anon_gid); + #else new->fsgid = exp->ex_anon_gid; + #endif set_groups(new, gi); put_group_info(gi); if (!uid_eq(new->fsuid, GLOBAL_ROOT_UID)) + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new,cap_drop_nfsd_set(new->cap_effective)); + #else new->cap_effective = cap_drop_nfsd_set(new->cap_effective); + #endif else + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new,cap_raise_nfsd_set(new->cap_effective, + new->cap_permitted)); + #else new->cap_effective = cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); + #endif put_cred(override_creds(new)); put_cred(new); return 0; diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 4039ffcf90ba..6e0dfa01e01e 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -41,6 +41,9 @@ #include "trace.h" #include "xdr4cb.h" #include "xdr4.h" +#ifdef CONFIG_CREDP +#include <asm/iee-cred.h> +#endif #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -946,8 +949,13 @@ static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct r if (!kcred) return NULL; - kcred->fsuid = ses->se_cb_sec.uid; - kcred->fsgid = ses->se_cb_sec.gid; + #ifdef CONFIG_CREDP + iee_set_cred_uid(kcred,ses->se_cb_sec.uid); + iee_set_cred_gid(kcred,ses->se_cb_sec.gid); + #else + kcred->uid = ses->se_cb_sec.uid; + kcred->gid = ses->se_cb_sec.gid; + #endif return kcred; } } diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 3509e73abe1f..798fe0352841 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -44,6 +44,10 @@ #include <linux/sunrpc/clnt.h> #include <linux/nfsd/cld.h> +#ifdef CONFIG_CREDP +#include <asm/iee-cred.h> +#endif + #include "nfsd.h" #include "state.h" #include "vfs.h" @@ -78,8 +82,13 @@ nfs4_save_creds(const struct cred **original_creds) if (!new) return -ENOMEM; + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new,GLOBAL_ROOT_UID); + iee_set_cred_fsgid(new,GLOBAL_ROOT_GID); + #else new->fsuid = GLOBAL_ROOT_UID; new->fsgid = GLOBAL_ROOT_GID; + #endif *original_creds = override_creds(new); put_cred(new); return 0; diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 355bf0db3235..6cb276dde4e9 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -16,6 +16,10 @@ #include "auth.h" #include "trace.h" +#ifdef CONFIG_CREDP +#include <asm/iee-cred.h> +#endif + #define NFSDDBG_FACILITY NFSDDBG_FH @@ -223,9 +227,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) error = nfserrno(-ENOMEM); goto out; } + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new,cap_raise_nfsd_set(new->cap_effective, + new->cap_permitted)); + #else new->cap_effective = cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); + #endif put_cred(override_creds(new)); put_cred(new); } else { diff --git a/fs/open.c b/fs/open.c index 54723fceb776..d83901dc50ff 100644 --- a/fs/open.c +++ b/fs/open.c @@ -35,6 +35,11 @@ #include <linux/mnt_idmapping.h> #include <linux/filelock.h> +#ifdef CONFIG_CREDP +#include <asm/iee-cred.h> +#endif + + #include "internal.h" int do_truncate(struct mnt_idmap *idmap, struct dentry *dentry, @@ -414,17 +419,34 @@ static const struct cred *access_override_creds(void) * routine. */ + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(override_cred,override_cred->uid); + iee_set_cred_fsgid(override_cred,override_cred->gid); + #else override_cred->fsuid = override_cred->uid; override_cred->fsgid = override_cred->gid; + #endif if (!issecure(SECURE_NO_SETUID_FIXUP)) { /* Clear the capabilities if we switch to a non-root user */ kuid_t root_uid = make_kuid(override_cred->user_ns, 0); if (!uid_eq(override_cred->uid, root_uid)) + #ifdef CONFIG_CREDP + do { + kernel_cap_t tmp_cap = override_cred->cap_effective; + tmp_cap.val = 0; + iee_set_cred_cap_effective(override_cred, tmp_cap); + } while (0); + #else cap_clear(override_cred->cap_effective); + #endif else + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(override_cred,override_cred->cap_permitted); + #else override_cred->cap_effective = override_cred->cap_permitted; + #endif } /* @@ -444,7 +466,11 @@ static const struct cred *access_override_creds(void) * expecting RCU freeing. But normal thread-synchronous * cred accesses will keep things non-RCY. */ + #ifdef CONFIG_CREDP + iee_set_cred_non_rcu(override_cred,1); + #else override_cred->non_rcu = 1; + #endif old_cred = override_creds(override_cred); diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 033fc0458a3d..2afa31ead2b5 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -16,6 +16,10 @@ #include <linux/ratelimit.h> #include "overlayfs.h" +#ifdef CONFIG_CREDP +#include <asm/iee-cred.h> +#endif + static unsigned short ovl_redirect_max = 256; module_param_named(redirect_max, ovl_redirect_max, ushort, 0644); MODULE_PARM_DESC(redirect_max, @@ -593,8 +597,13 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, * create a new inode, so just use the ovl mounter's * fs{u,g}id. */ + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(override_cred,inode->i_uid); + iee_set_cred_fsgid(override_cred,inode->i_gid); + #else override_cred->fsuid = inode->i_uid; override_cred->fsgid = inode->i_gid; + #endif err = security_dentry_create_files_as(dentry, attr->mode, &dentry->d_name, old_cred, override_cred); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 2c056d737c27..9ede99ddb04b 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -21,6 +21,10 @@ #include "overlayfs.h" #include "params.h" +#ifdef CONFIG_CREDP +#include <asm/iee-cred.h> +#endif + MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); MODULE_DESCRIPTION("Overlay filesystem"); MODULE_LICENSE("GPL"); @@ -1485,7 +1489,15 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_export_op = &ovl_export_fid_operations; /* Never override disk quota limits or use reserved space */ + #ifdef CONFIG_CREDP + { + kernel_cap_t tmp = cred->cap_effective; + cap_lower(tmp, CAP_SYS_RESOURCE); + iee_set_cred_cap_effective(cred, tmp); + } + #else cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); + #endif sb->s_magic = OVERLAYFS_SUPER_MAGIC; sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers : diff --git a/fs/smb/client/cifs_spnego.c b/fs/smb/client/cifs_spnego.c index af7849e5974f..4ac2f0e65955 100644 --- a/fs/smb/client/cifs_spnego.c +++ b/fs/smb/client/cifs_spnego.c @@ -18,6 +18,10 @@ #include "cifs_spnego.h" #include "cifs_debug.h" #include "cifsproto.h" +#ifdef CONFIG_CREDP +#include <asm/iee-cred.h> +#endif + static const struct cred *spnego_cred; /* create a new cifs key */ @@ -212,8 +216,13 @@ init_cifs_spnego(void) * the results it looks up */ set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); + #ifdef CONFIG_CREDP + iee_set_cred_thread_keyring(cred,keyring); + iee_set_cred_jit_keyring(cred,KEY_REQKEY_DEFL_THREAD_KEYRING); + #else cred->thread_keyring = keyring; cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; + #endif spnego_cred = cred; cifs_dbg(FYI, "cifs spnego keyring: %d\n", key_serial(keyring)); diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index f5b6df82e857..9f0ff045836d 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -26,6 +26,10 @@ #include "cifs_fs_sb.h" #include "cifs_unicode.h" +#ifdef CONFIG_CREDP +#include <asm/iee-cred.h> +#endif + /* security id for everyone/world system group */ static const struct cifs_sid sid_everyone = { 1, 1, {0, 0, 0, 0, 0, 1}, {0} }; @@ -491,8 +495,13 @@ init_cifs_idmap(void) /* instruct request_key() to use this special keyring as a cache for * the results it looks up */ set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); + #ifdef CONFIG_CREDP + iee_set_cred_thread_keyring(cred,keyring); + iee_set_cred_jit_keyring(cred,KEY_REQKEY_DEFL_THREAD_KEYRING); + #else cred->thread_keyring = keyring; cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; + #endif root_cred = cred; cifs_dbg(FYI, "cifs idmap keyring: %d\n", key_serial(keyring)); diff --git a/include/asm-generic/early_ioremap.h b/include/asm-generic/early_ioremap.h index 9d0479f50f97..f501e0f965f8 100644 --- a/include/asm-generic/early_ioremap.h +++ b/include/asm-generic/early_ioremap.h @@ -17,6 +17,9 @@ extern void *early_memremap_ro(resource_size_t phys_addr, extern void *early_memremap_prot(resource_size_t phys_addr, unsigned long size, unsigned long prot_val); extern void early_iounmap(void __iomem *addr, unsigned long size); +#ifdef CONFIG_PTP +extern void early_iounmap_after_init(void __iomem *addr, unsigned long size); +#endif extern void early_memunmap(void *addr, unsigned long size); #if defined(CONFIG_GENERIC_EARLY_IOREMAP) && defined(CONFIG_MMU) diff --git a/include/asm-generic/fixmap.h b/include/asm-generic/fixmap.h index 8cc7b09c1bc7..83158589a545 100644 --- a/include/asm-generic/fixmap.h +++ b/include/asm-generic/fixmap.h @@ -70,6 +70,24 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr) __set_fixmap(idx, 0, FIXMAP_PAGE_CLEAR) #endif +#ifdef CONFIG_PTP +#ifndef clear_fixmap_init +#define clear_fixmap_init(idx) \ + __iee_set_fixmap_pre_init(idx, 0, FIXMAP_PAGE_CLEAR) +#endif + +#define __iee_set_fixmap_offset_pre_init(idx, phys, flags) \ +({ \ + unsigned long ________addr; \ + __iee_set_fixmap_pre_init(idx, phys, flags); \ + ________addr = fix_to_virt(idx) + ((phys) & (PAGE_SIZE - 1)); \ + ________addr; \ +}) + +#define iee_set_fixmap_offset_pre_init(idx, phys) \ + __iee_set_fixmap_offset_pre_init(idx, phys, FIXMAP_PAGE_NORMAL) +#endif + /* Return a pointer with offset calculated */ #define __set_fixmap_offset(idx, phys, flags) \ ({ \ diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h index c75d4a753849..506ff9662e02 100644 --- a/include/asm-generic/pgalloc.h +++ b/include/asm-generic/pgalloc.h @@ -7,6 +7,10 @@ #define GFP_PGTABLE_KERNEL (GFP_KERNEL | __GFP_ZERO) #define GFP_PGTABLE_USER (GFP_PGTABLE_KERNEL | __GFP_ACCOUNT) +#ifdef CONFIG_PTP +#include <linux/iee-func.h> +#endif + /** * __pte_alloc_one_kernel - allocate memory for a PTE-level kernel page table * @mm: the mm_struct of the current context @@ -23,6 +27,13 @@ static inline pte_t *__pte_alloc_one_kernel(struct mm_struct *mm) if (!ptdesc) return NULL; + + #ifdef CONFIG_PTP + unsigned long iee_addr = __phys_to_iee(__pa(ptdesc_address(ptdesc))); + set_iee_page_valid(iee_addr); + iee_set_logical_mem_ro((unsigned long)ptdesc_address(ptdesc)); + #endif + return ptdesc_address(ptdesc); } @@ -46,6 +57,11 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) */ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { + #ifdef CONFIG_PTP + unsigned long iee_addr = __phys_to_iee(__pa(pte)); + set_iee_page_invalid(iee_addr); + iee_set_logical_mem_rw((unsigned long)pte); + #endif pagetable_free(virt_to_ptdesc(pte)); } @@ -73,6 +89,13 @@ static inline pgtable_t __pte_alloc_one(struct mm_struct *mm, gfp_t gfp) return NULL; } + #ifdef CONFIG_PTP + pte_t *pte = (pte_t *)page_address(ptdesc_page(ptdesc)); + unsigned long iee_addr = __phys_to_iee(__pa(pte)); + set_iee_page_valid(iee_addr); + iee_set_logical_mem_ro((unsigned long)pte); + #endif + return ptdesc_page(ptdesc); } @@ -103,9 +126,20 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm) */ static inline void pte_free(struct mm_struct *mm, struct page *pte_page) { + #ifdef CONFIG_PTP + unsigned long iee_addr; + #endif + struct ptdesc *ptdesc = page_ptdesc(pte_page); pagetable_pte_dtor(ptdesc); + + #ifdef CONFIG_PTP + iee_addr = __phys_to_iee(__pa(page_address(pte_page))); + set_iee_page_invalid(iee_addr); + iee_set_logical_mem_rw((unsigned long)page_address(pte_page)); + #endif + pagetable_free(ptdesc); } @@ -145,10 +179,21 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) #ifndef __HAVE_ARCH_PMD_FREE static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { + #ifdef CONFIG_PTP + unsigned long iee_addr; + #endif + struct ptdesc *ptdesc = virt_to_ptdesc(pmd); BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); pagetable_pmd_dtor(ptdesc); + + #ifdef CONFIG_PTP + iee_addr = __phys_to_iee(__pa(pmd)); + set_iee_page_invalid(iee_addr); + iee_set_logical_mem_rw((unsigned long)pmd); + #endif + pagetable_free(ptdesc); } #endif @@ -190,7 +235,16 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) static inline void __pud_free(struct mm_struct *mm, pud_t *pud) { + #ifdef CONFIG_PTP + unsigned long iee_addr; + #endif + BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); + #ifdef CONFIG_PTP + iee_addr = __phys_to_iee(__pa(pud)); + set_iee_page_invalid(iee_addr); + iee_set_logical_mem_rw((unsigned long)pud); + #endif pagetable_free(virt_to_ptdesc(pud)); } diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 200853042fc7..9d733afced53 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -346,6 +346,17 @@ KEEP(*(.dtb.init.rodata)) \ __dtb_end = .; +#ifdef CONFIG_KOI +#define KOI_DATA() \ + . = ALIGN(PAGE_SIZE); \ + __koi_data_start = .; \ + *(.data..koi) \ + . = ALIGN(PAGE_SIZE); \ + __koi_data_end = .; +#else +#define KOI_DATA() +#endif + /* * .data section */ @@ -370,8 +381,8 @@ BRANCH_PROFILE() \ TRACE_PRINTKS() \ BPF_RAW_TP() \ - TRACEPOINT_STR() - + TRACEPOINT_STR() \ + KOI_DATA() /* * Data section helpers */ @@ -1093,6 +1104,14 @@ * They will fit only a subset of the architectures */ +#ifdef CONFIG_CREDP + #define CRED_DATA \ + . = ALIGN(PAGE_SIZE); \ + *(.iee.cred) \ + . = ALIGN(PAGE_SIZE); +#else + #define CRED_DATA +#endif /* * Writeable data. @@ -1110,6 +1129,7 @@ . = ALIGN(PAGE_SIZE); \ .data : AT(ADDR(.data) - LOAD_OFFSET) { \ INIT_TASK_DATA(inittask) \ + CRED_DATA \ NOSAVE_DATA \ PAGE_ALIGNED_DATA(pagealigned) \ CACHELINE_ALIGNED_DATA(cacheline) \ diff --git a/include/linux/cred.h b/include/linux/cred.h index e01c6d094a30..cceb4842b619 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -18,6 +18,10 @@ #include <linux/sched/user.h> #include <linux/kabi.h> +#ifdef CONFIG_CREDP +#include <asm/iee-def.h> +#endif + struct cred; struct inode; @@ -153,6 +157,22 @@ struct cred { KABI_RESERVE(4) } __randomize_layout; +#ifdef CONFIG_CREDP +extern unsigned long long iee_rw_gate(int flag, ...); +static void iee_set_cred_non_rcu(struct cred *cred, int non_rcu) +{ + iee_rw_gate(IEE_OP_SET_CRED_NON_RCU,cred,non_rcu); + *(int *)(&(((struct rcu_head *)(cred->rcu.func))->next)) = non_rcu; +} + +static bool noinline iee_set_cred_atomic_op_usage(struct cred *cred, int flag, int nr) +{ + bool ret; + ret = iee_rw_gate(IEE_OP_SET_CRED_ATOP_USAGE,cred,flag,nr); + return ret; +} +#endif + extern void __put_cred(struct cred *); extern void exit_creds(struct task_struct *); extern int copy_creds(struct task_struct *, unsigned long); @@ -189,7 +209,11 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) */ static inline struct cred *get_new_cred_many(struct cred *cred, int nr) { + #ifdef CONFIG_CREDP + iee_set_cred_atomic_op_usage(cred, AT_ADD, nr); + #else atomic_long_add(nr, &cred->usage); + #endif return cred; } @@ -202,7 +226,7 @@ static inline struct cred *get_new_cred_many(struct cred *cred, int nr) */ static inline struct cred *get_new_cred(struct cred *cred) { - return get_new_cred_many(cred, 1); + return get_new_cred_many(cred, 1); // XXXzgc atomic_inc -> get_new_cred_many } /** @@ -224,7 +248,11 @@ static inline const struct cred *get_cred_many(const struct cred *cred, int nr) struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return cred; + #ifdef CONFIG_CREDP + iee_set_cred_non_rcu(nonconst_cred,0); + #else nonconst_cred->non_rcu = 0; + #endif return get_new_cred_many(nonconst_cred, nr); } @@ -247,9 +275,19 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return NULL; + #ifdef CONFIG_CREDP + if (!iee_set_cred_atomic_op_usage(nonconst_cred,AT_INC_NOT_ZERO,0)) + return NULL; + #else if (!atomic_long_inc_not_zero(&nonconst_cred->usage)) return NULL; + #endif + + #ifdef CONFIG_CREDP + iee_set_cred_non_rcu(nonconst_cred,0); + #else nonconst_cred->non_rcu = 0; + #endif return cred; } @@ -270,8 +308,13 @@ static inline void put_cred_many(const struct cred *_cred, int nr) struct cred *cred = (struct cred *) _cred; if (cred) { + #ifdef CONFIG_CREDP + if (iee_set_cred_atomic_op_usage(cred,AT_SUB_AND_TEST,nr)) + __put_cred(cred); + #else if (atomic_long_sub_and_test(nr, &cred->usage)) __put_cred(cred); + #endif } } diff --git a/include/linux/efi.h b/include/linux/efi.h index 9ed79128458c..970cc4f7068b 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -740,6 +740,15 @@ extern int __init __efi_memmap_init(struct efi_memory_map_data *data); extern int __init efi_memmap_init_early(struct efi_memory_map_data *data); extern int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size); extern void __init efi_memmap_unmap(void); +#ifdef CONFIG_PTP +extern void __init efi_memmap_unmap_after_init(void); +#endif +extern int __init efi_memmap_install(struct efi_memory_map_data *data); +extern int __init efi_memmap_split_count(efi_memory_desc_t *md, + struct range *range); +extern void __init efi_memmap_insert(struct efi_memory_map *old_memmap, + void *buf, struct efi_mem_range *mem); +extern void __init efi_print_memmap(void); #ifdef CONFIG_EFI_ESRT extern void __init efi_esrt_init(void); diff --git a/include/linux/iee-func.h b/include/linux/iee-func.h new file mode 100644 index 000000000000..79171de67c2a --- /dev/null +++ b/include/linux/iee-func.h @@ -0,0 +1,27 @@ +#ifndef _LINUX_IEE_FUNC_H +#define _LINUX_IEE_FUNC_H + +#ifdef CONFIG_IEE +// Declare the __entry_task. +__attribute__((aligned(PAGE_SIZE))) DECLARE_PER_CPU(struct task_struct *[PAGE_SIZE/sizeof(struct task_struct *)], __entry_task); + +extern unsigned long long iee_rw_gate(int flag, ...); +extern u32 get_cpu_asid_bits(void); +extern unsigned long arm64_mm_context_get(struct mm_struct *mm); +extern void set_iee_page_valid(unsigned long addr); +extern void set_iee_page_invalid(unsigned long addr); +extern void iee_set_logical_mem_ro(unsigned long addr); +extern void iee_set_logical_mem_rw(unsigned long addr); +extern void iee_set_token_mm(struct task_struct *tsk, struct mm_struct *mm); +extern void iee_set_token_pgd(struct task_struct *tsk, pgd_t *pgd); +extern void iee_init_token(struct task_struct *tsk, void *kernel_stack, void *iee_stack); +extern void iee_free_token(struct task_struct *tsk); +extern unsigned long iee_read_token_stack(struct task_struct *tsk); +extern void iee_set_token_page_valid(void *token, void *new); +extern void iee_set_token_page_invalid(void *token); +extern void iee_set_kernel_ppage(unsigned long addr); +extern void iee_set_kernel_upage(unsigned long addr); +extern void iee_write_in_byte(void *ptr, u64 data, int length); +#endif + +#endif \ No newline at end of file diff --git a/include/linux/module.h b/include/linux/module.h index 4db2878d9e42..ef8d51994017 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -606,6 +606,7 @@ struct module { KABI_RESERVE(2) KABI_RESERVE(3) KABI_RESERVE(4) + } ____cacheline_aligned __randomize_layout; #ifndef MODULE_ARCH_INIT #define MODULE_ARCH_INIT {} diff --git a/include/linux/sched.h b/include/linux/sched.h index f40411aa7b70..297becfbc8e3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -773,6 +773,24 @@ struct task_struct_resvd { struct task_struct *task; }; +#if defined(CONFIG_IEE) || defined(CONFIG_KOI) +struct task_token { +#ifdef CONFIG_IEE + struct mm_struct *mm; /* VA */ + pgd_t *pgd; /* Logical VA */ + void *iee_stack; /* VA */ + bool valid; + void *kernel_stack; /* VA */ +#endif +#ifdef CONFIG_KOI + void *koi_kernel_stack; /* VA */ + void *koi_stack; /* VA */ + void *koi_stack_base; /* VA */ + unsigned long current_ttbr1; +#endif +}; +#endif + struct task_struct { #ifdef CONFIG_THREAD_INFO_IN_TASK /* @@ -795,6 +813,7 @@ struct task_struct { randomized_struct_fields_start void *stack; + refcount_t usage; /* Per task flags (PF_*), defined further below: */ unsigned int flags; diff --git a/init/main.c b/init/main.c index 803332dd3d90..0f8d6e2744c2 100644 --- a/init/main.c +++ b/init/main.c @@ -102,6 +102,12 @@ #include <linux/randomize_kstack.h> #include <net/net_namespace.h> +#ifdef CONFIG_IEE +#include <linux/iee-func.h> +#include <asm/iee-si.h> +#include <linux/stop_machine.h> +#endif + #include <asm/io.h> #include <asm/setup.h> #include <asm/sections.h> @@ -112,6 +118,10 @@ #include <kunit/test.h> +#ifdef CONFIG_PTP +extern void *bm_pte_addr; +#endif + static int kernel_init(void *); /* @@ -880,6 +890,9 @@ void start_kernel(void) { char *command_line; char *after_dashes; + #ifdef CONFIG_IEE + unsigned int cpu; + #endif set_task_stack_end_magic(&init_task); smp_setup_processor_id(); @@ -904,6 +917,16 @@ void start_kernel(void) setup_command_line(command_line); setup_nr_cpu_ids(); setup_per_cpu_areas(); + #ifdef CONFIG_IEE + for_each_possible_cpu(cpu) + { + // Map the __entry_task to IEE. + set_iee_page_valid((unsigned long)__phys_to_iee(__pa(SHIFT_PERCPU_PTR(__entry_task,__per_cpu_offset[cpu])))); + // Set the __entry_task of cpu 0 readonly in lm. + if(cpu == smp_processor_id()) + iee_set_logical_mem_ro((unsigned long)SHIFT_PERCPU_PTR(__entry_task,__per_cpu_offset[cpu])); + } + #endif smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ boot_cpu_hotplug_init(); @@ -1446,6 +1469,9 @@ static int __ref kernel_init(void *unused) wait_for_completion(&kthreadd_done); kernel_init_freeable(); + #ifdef CONFIG_PTP + iee_set_logical_mem_ro((unsigned long)bm_pte_addr); + #endif /* need to finish all async __init code before freeing the memory */ async_synchronize_full(); @@ -1462,7 +1488,7 @@ static int __ref kernel_init(void *unused) * to finalize PTI. */ pti_finalize(); - + system_state = SYSTEM_RUNNING; numa_default_policy(); diff --git a/kernel/cred.c b/kernel/cred.c index c033a201c808..2e44530976d5 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -20,6 +20,11 @@ #include <linux/cn_proc.h> #include <linux/uidgid.h> +#ifdef CONFIG_CREDP +#include <asm/iee-cred.h> +#include <linux/iee-func.h> +#endif + #if 0 #define kdebug(FMT, ...) \ printk("[%-5.5s%5u] " FMT "\n", \ @@ -34,6 +39,9 @@ do { \ #endif static struct kmem_cache *cred_jar; +#ifdef CONFIG_CREDP +static struct kmem_cache *rcu_jar; +#endif /* init to 2 - one for init_task, one to ensure it is never freed */ static struct group_info init_groups = { .usage = REFCOUNT_INIT(2) }; @@ -41,6 +49,32 @@ static struct group_info init_groups = { .usage = REFCOUNT_INIT(2) }; /* * The initial credentials for the initial task */ +#ifdef CONFIG_CREDP +struct cred init_cred __section(".iee.cred") = { + .usage = ATOMIC_INIT(4), +#ifdef CONFIG_DEBUG_CREDENTIALS + .subscribers = ATOMIC_INIT(2), + .magic = CRED_MAGIC, +#endif + .uid = GLOBAL_ROOT_UID, + .gid = GLOBAL_ROOT_GID, + .suid = GLOBAL_ROOT_UID, + .sgid = GLOBAL_ROOT_GID, + .euid = GLOBAL_ROOT_UID, + .egid = GLOBAL_ROOT_GID, + .fsuid = GLOBAL_ROOT_UID, + .fsgid = GLOBAL_ROOT_GID, + .securebits = SECUREBITS_DEFAULT, + .cap_inheritable = CAP_EMPTY_SET, + .cap_permitted = CAP_FULL_SET, + .cap_effective = CAP_FULL_SET, + .cap_bset = CAP_FULL_SET, + .user = INIT_USER, + .user_ns = &init_user_ns, + .group_info = &init_groups, + .ucounts = &init_ucounts, +}; +#else struct cred init_cred = { .usage = ATOMIC_INIT(4), .uid = GLOBAL_ROOT_UID, @@ -61,13 +95,43 @@ struct cred init_cred = { .group_info = &init_groups, .ucounts = &init_ucounts, }; +#endif + +static inline void set_cred_subscribers(struct cred *cred, int n) +{ +#ifdef CONFIG_DEBUG_CREDENTIALS + atomic_set(&cred->subscribers, n); +#endif +} + +static inline int read_cred_subscribers(const struct cred *cred) +{ +#ifdef CONFIG_DEBUG_CREDENTIALS + return atomic_read(&cred->subscribers); +#else + return 0; +#endif +} + +static inline void alter_cred_subscribers(const struct cred *_cred, int n) +{ +#ifdef CONFIG_DEBUG_CREDENTIALS + struct cred *cred = (struct cred *) _cred; + + atomic_add(n, &cred->subscribers); +#endif +} /* * The RCU callback to actually dispose of a set of credentials */ static void put_cred_rcu(struct rcu_head *rcu) { + #ifdef CONFIG_CREDP + struct cred *cred = *(struct cred **)(rcu + 1); + #else struct cred *cred = container_of(rcu, struct cred, rcu); + #endif kdebug("put_cred_rcu(%p)", cred); @@ -86,6 +150,9 @@ static void put_cred_rcu(struct rcu_head *rcu) if (cred->ucounts) put_ucounts(cred->ucounts); put_user_ns(cred->user_ns); + #ifdef CONFIG_CREDP + kmem_cache_free(rcu_jar, (struct rcu_head *)(cred->rcu.func)); + #endif kmem_cache_free(cred_jar, cred); } @@ -104,10 +171,22 @@ void __put_cred(struct cred *cred) BUG_ON(cred == current->cred); BUG_ON(cred == current->real_cred); + #ifdef CONFIG_CREDP + if (*(int *)(&(((struct rcu_head *)(cred->rcu.func))->next))) + #else if (cred->non_rcu) + #endif + #ifdef CONFIG_CREDP + put_cred_rcu((struct rcu_head *)(cred->rcu.func)); + #else put_cred_rcu(&cred->rcu); + #endif else + #ifdef CONFIG_CREDP + call_rcu((struct rcu_head *)(cred->rcu.func), put_cred_rcu); + #else call_rcu(&cred->rcu, put_cred_rcu); + #endif } EXPORT_SYMBOL(__put_cred); @@ -178,7 +257,18 @@ struct cred *cred_alloc_blank(void) if (!new) return NULL; + #ifdef CONFIG_CREDP + iee_set_cred_rcu(new,kmem_cache_zalloc(rcu_jar, GFP_KERNEL)); + *(struct cred **)(((struct rcu_head *)(new->rcu.func)) + 1) = new; + iee_set_cred_atomic_set_usage(new,1); + #else atomic_long_set(&new->usage, 1); + #endif + + #ifdef CONFIG_DEBUG_CREDENTIALS + new->magic = CRED_MAGIC; + #endif + if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0) goto error; @@ -213,13 +303,25 @@ struct cred *prepare_creds(void) if (!new) return NULL; + #ifdef CONFIG_CREDP + iee_set_cred_rcu(new,kmem_cache_alloc(rcu_jar, GFP_KERNEL)); + *(struct cred **)(((struct rcu_head *)(new->rcu.func)) + 1) = new; + #endif + kdebug("prepare_creds() alloc %p", new); old = task->cred; + #ifdef CONFIG_CREDP + iee_copy_cred(old,new); + + iee_set_cred_non_rcu(new,0); + iee_set_cred_atomic_set_usage(new,1); + #else memcpy(new, old, sizeof(struct cred)); new->non_rcu = 0; atomic_long_set(&new->usage, 1); + #endif get_group_info(new->group_info); get_uid(new->user); get_user_ns(new->user_ns); @@ -232,10 +334,18 @@ struct cred *prepare_creds(void) #endif #ifdef CONFIG_SECURITY +#ifdef CONFIG_CREDP + iee_set_cred_security(new,NULL); +#else new->security = NULL; +#endif #endif + #ifdef CONFIG_CREDP + iee_set_cred_ucounts(new, get_ucounts(new->ucounts)); + #else new->ucounts = get_ucounts(new->ucounts); + #endif if (!new->ucounts) goto error; @@ -265,15 +375,30 @@ struct cred *prepare_exec_creds(void) #ifdef CONFIG_KEYS /* newly exec'd tasks don't get a thread keyring */ key_put(new->thread_keyring); + #ifdef CONFIG_CREDP + iee_set_cred_thread_keyring(new,NULL); + #else new->thread_keyring = NULL; + #endif /* inherit the session keyring; new process keyring */ key_put(new->process_keyring); + #ifdef CONFIG_CREDP + iee_set_cred_process_keyring(new,NULL); + #else new->process_keyring = NULL; + #endif #endif + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new,new->euid); + iee_set_cred_suid(new,new->euid); + iee_set_cred_fsgid(new,new->egid); + iee_set_cred_sgid(new,new->egid); + #else new->suid = new->fsuid = new->euid; new->sgid = new->fsgid = new->egid; + #endif return new; } @@ -327,7 +452,11 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) * had one */ if (new->thread_keyring) { key_put(new->thread_keyring); + #ifdef CONFIG_CREDP + iee_set_cred_thread_keyring(new,NULL); + #else new->thread_keyring = NULL; + #endif if (clone_flags & CLONE_THREAD) install_thread_keyring_to_cred(new); } @@ -337,7 +466,11 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) */ if (!(clone_flags & CLONE_THREAD)) { key_put(new->process_keyring); + #ifdef CONFIG_CREDP + iee_set_cred_process_keyring(new,NULL); + #else new->process_keyring = NULL; + #endif } #endif @@ -594,7 +727,11 @@ int set_cred_ucounts(struct cred *new) if (!(new_ucounts = alloc_ucounts(new->user_ns, new->uid))) return -EAGAIN; + #ifdef CONFIG_CREDP + iee_set_cred_ucounts(new, new_ucounts); + #else new->ucounts = new_ucounts; + #endif put_ucounts(old_ucounts); return 0; @@ -606,8 +743,21 @@ int set_cred_ucounts(struct cred *new) void __init cred_init(void) { /* allocate a slab in which we can store credentials */ + #ifdef CONFIG_CREDP cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred), 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT|SLAB_RED_ZONE, NULL); + rcu_jar = kmem_cache_create("rcu_jar", sizeof(struct rcu_head) + sizeof(struct cred *), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); + // Map init_cred + *((struct rcu_head **)(&(init_cred.rcu.func))) = (struct rcu_head *)kmem_cache_zalloc(rcu_jar, GFP_KERNEL); + *(struct cred **)(((struct rcu_head *)(init_cred.rcu.func)) + 1) = &init_cred; + set_iee_page_valid(__phys_to_iee(__pa_symbol(&init_cred))); + iee_set_logical_mem_ro((unsigned long)&init_cred); + iee_set_logical_mem_ro((unsigned long)__va(__pa_symbol(&init_cred))); + #else + cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred), 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); + #endif } /** @@ -638,29 +788,56 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) if (!new) return NULL; + #ifdef CONFIG_CREDP + iee_set_cred_rcu(new,kmem_cache_alloc(rcu_jar, GFP_KERNEL)); + *(struct cred **)(((struct rcu_head *)(new->rcu.func)) + 1) = new; + #endif + kdebug("prepare_kernel_cred() alloc %p", new); old = get_task_cred(daemon); + #ifdef CONFIG_CREDP + iee_copy_cred(old,new); + iee_set_cred_non_rcu(new,0); + iee_set_cred_atomic_set_usage(new,1); + #else *new = *old; new->non_rcu = 0; atomic_long_set(&new->usage, 1); + #endif get_uid(new->user); get_user_ns(new->user_ns); get_group_info(new->group_info); #ifdef CONFIG_KEYS +#ifdef CONFIG_CREDP + iee_set_cred_session_keyring(new,NULL); + iee_set_cred_process_keyring(new,NULL); + iee_set_cred_thread_keyring(new,NULL); + iee_set_cred_request_key_auth(new,NULL); + iee_set_cred_jit_keyring(new,KEY_REQKEY_DEFL_THREAD_KEYRING); +#else new->session_keyring = NULL; new->process_keyring = NULL; new->thread_keyring = NULL; new->request_key_auth = NULL; new->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; #endif +#endif #ifdef CONFIG_SECURITY +#ifdef CONFIG_CREDP + iee_set_cred_security(new,NULL); +#else new->security = NULL; #endif +#endif + #ifdef CONFIG_CREDP + iee_set_cred_ucounts(new, get_ucounts(new->ucounts)); + #else new->ucounts = get_ucounts(new->ucounts); + #endif if (!new->ucounts) goto error; @@ -727,8 +904,13 @@ int set_create_files_as(struct cred *new, struct inode *inode) { if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid)) return -EINVAL; + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new,inode->i_uid); + iee_set_cred_fsgid(new,inode->i_gid); + #else new->fsuid = inode->i_uid; new->fsgid = inode->i_gid; + #endif return security_kernel_create_files_as(new, inode); } EXPORT_SYMBOL(set_create_files_as); diff --git a/kernel/exit.c b/kernel/exit.c index 21a59a6e1f2e..d21a109f0497 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -74,6 +74,10 @@ #include <asm/unistd.h> #include <asm/mmu_context.h> +#ifdef CONFIG_IEE +#include <linux/iee-func.h> +#endif + /* * The default value should be high enough to not crash a system that randomly * crashes its kernel from time to time, but low enough to at least not permit @@ -558,6 +562,10 @@ static void exit_mm(void) smp_mb__after_spinlock(); local_irq_disable(); current->mm = NULL; + #ifdef CONFIG_IEE + iee_set_token_mm(current, NULL); + iee_set_token_pgd(current, NULL); + #endif membarrier_update_current_mm(NULL); enter_lazy_tlb(mm, current); local_irq_enable(); diff --git a/kernel/fork.c b/kernel/fork.c index e033388b11bd..c93e18a4f0b3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -115,6 +115,10 @@ #define CREATE_TRACE_POINTS #include <trace/events/task.h> +#ifdef CONFIG_IEE +#include <linux/iee-func.h> +#endif + /* * Minimum number of threads to boot the kernel */ @@ -128,14 +132,14 @@ /* * Protected counters by write_lock_irq(&tasklist_lock) */ -unsigned long total_forks; /* Handle normal Linux uptimes. */ -int nr_threads; /* The idle threads do not count.. */ +unsigned long total_forks; /* Handle normal Linux uptimes. */ +int nr_threads; /* The idle threads do not count.. */ -static int max_threads; /* tunable limit on nr_threads */ +static int max_threads; /* tunable limit on nr_threads */ -#define NAMED_ARRAY_INDEX(x) [x] = __stringify(x) +#define NAMED_ARRAY_INDEX(x) [x] = __stringify(x) -static const char * const resident_page_types[] = { +static const char *const resident_page_types[] = { NAMED_ARRAY_INDEX(MM_FILEPAGES), NAMED_ARRAY_INDEX(MM_ANONPAGES), NAMED_ARRAY_INDEX(MM_SWAPENTS), @@ -144,7 +148,7 @@ static const char * const resident_page_types[] = { DEFINE_PER_CPU(unsigned long, process_counts) = 0; -__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ +__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ #ifdef CONFIG_PROVE_RCU int lockdep_tasklist_lock_is_held(void) @@ -159,7 +163,7 @@ int nr_processes(void) int cpu; int total = 0; - for_each_possible_cpu(cpu) + for_each_possible_cpu (cpu) total += per_cpu(process_counts, cpu); return total; @@ -190,7 +194,7 @@ static inline void free_task_struct(struct task_struct *tsk) * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a * kmemcache based allocator. */ -# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) +#if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) # ifdef CONFIG_VMAP_STACK /* @@ -311,8 +315,8 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node) * so memcg accounting is performed manually on assigning/releasing * stacks to tasks. Drop __GFP_ACCOUNT. */ - stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN, - VMALLOC_START, VMALLOC_END, + stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN, VMALLOC_START, + VMALLOC_END, THREADINFO_GFP & ~__GFP_ACCOUNT, PAGE_KERNEL, 0, node, __builtin_return_address(0)); @@ -410,9 +414,10 @@ static void free_thread_stack(struct task_struct *tsk) void thread_stack_cache_init(void) { - thread_stack_cache = kmem_cache_create_usercopy("thread_stack", - THREAD_SIZE, THREAD_SIZE, 0, 0, - THREAD_SIZE, NULL); + thread_stack_cache = + kmem_cache_create_usercopy("thread_stack", THREAD_SIZE, + THREAD_SIZE, 0, 0, THREAD_SIZE, + NULL); BUG_ON(thread_stack_cache == NULL); } @@ -502,7 +507,8 @@ struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) { - struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); + struct vm_area_struct *new = + kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!new) return NULL; @@ -602,8 +608,15 @@ void put_task_stack(struct task_struct *tsk) } #endif +#ifdef CONFIG_KOI +extern s64 koi_offset; +#endif + void free_task(struct task_struct *tsk) { + #ifdef CONFIG_IEE + void *iee_stack; + #endif #ifdef CONFIG_SECCOMP WARN_ON_ONCE(tsk->seccomp.filter); #endif @@ -633,6 +646,45 @@ void free_task(struct task_struct *tsk) if (dynamic_affinity_enabled()) sched_prefer_cpus_free(tsk); #endif +#ifdef CONFIG_IEE + // Free iee stack. + iee_stack = (void *)iee_read_token_stack(tsk); + if (iee_stack) { + iee_set_kernel_ppage( + (unsigned long)(iee_stack - PAGE_SIZE * 4)); + free_pages((unsigned long)(iee_stack - PAGE_SIZE * 4), 3); + } + // Free task_token. + // Empty the token + iee_free_token(tsk); + +#ifdef CONFIG_KOI + // Free koi stack. + unsigned long koi_stack = iee_rw_gate(IEE_READ_KOI_STACK_BASE, current); + if (koi_stack != 0) + free_pages(koi_stack, 2); +#endif +#else +#ifdef CONFIG_KOI +// free koi stack + struct task_token *token = (struct task_token *)((unsigned long)current + koi_offset); + unsigned long flags; + local_irq_save(flags); + asm volatile( + "at s1e1r, %0\n" + "isb\n" + : + :"r"(token)); + unsigned long res = read_sysreg(par_el1); + local_irq_restore(flags); + if (!(res & 0x1)) { + unsigned long koi_stack = token->koi_stack_base; + if (koi_stack != 0) + free_pages(koi_stack, 2); + } +#endif +#endif + #ifdef CONFIG_QOS_SCHED_SMART_GRID if (smart_grid_enabled()) sched_grid_qos_free(tsk); @@ -657,7 +709,7 @@ static void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm) #ifdef CONFIG_MMU static __latent_entropy int dup_mmap(struct mm_struct *mm, - struct mm_struct *oldmm) + struct mm_struct *oldmm) { struct vm_area_struct *mpnt, *tmp; int retval; @@ -773,7 +825,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, flush_dcache_mmap_lock(mapping); /* insert tmp into the share list, just after mpnt */ vma_interval_tree_insert_after(tmp, mpnt, - &mapping->i_mmap); + &mapping->i_mmap); flush_dcache_mmap_unlock(mapping); i_mmap_unlock_write(mapping); } @@ -842,7 +894,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) mmap_write_unlock(oldmm); return 0; } -#define mm_alloc_pgd(mm) (0) +#define mm_alloc_pgd(mm) (0) #define mm_free_pgd(mm) #endif /* CONFIG_MMU */ @@ -850,20 +902,22 @@ static void check_mm(struct mm_struct *mm) { int i; - BUILD_BUG_ON_MSG(ARRAY_SIZE(resident_page_types) != NR_MM_COUNTERS, - "Please make sure 'struct resident_page_types[]' is updated as well"); + BUILD_BUG_ON_MSG( + ARRAY_SIZE(resident_page_types) != NR_MM_COUNTERS, + "Please make sure 'struct resident_page_types[]' is updated as well"); for (i = 0; i < NR_MM_COUNTERS; i++) { long x = mm_counter_sum(mm, i); if (unlikely(x)) - pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n", - mm, resident_page_types[i], x); + pr_alert( + "BUG: Bad rss-counter state mm:%p type:%s val:%ld\n", + mm, resident_page_types[i], x); } if (mm_pgtables_bytes(mm)) pr_alert("BUG: non-zero pgtables_bytes on freeing mm: %ld\n", - mm_pgtables_bytes(mm)); + mm_pgtables_bytes(mm)); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS VM_BUG_ON_MM(mm->pmd_huge_pte, mm); @@ -1014,14 +1068,6 @@ void __put_task_struct(struct task_struct *tsk) } EXPORT_SYMBOL_GPL(__put_task_struct); -void __put_task_struct_rcu_cb(struct rcu_head *rhp) -{ - struct task_struct *task = container_of(rhp, struct task_struct, rcu); - - __put_task_struct(task); -} -EXPORT_SYMBOL_GPL(__put_task_struct_rcu_cb); - void __init __weak arch_task_cache_init(void) { } /* @@ -1039,8 +1085,8 @@ static void set_max_threads(unsigned int max_threads_suggested) if (fls64(nr_pages) + fls64(PAGE_SIZE) > 64) threads = MAX_THREADS; else - threads = div64_u64((u64) nr_pages * (u64) PAGE_SIZE, - (u64) THREAD_SIZE * 8UL); + threads = div64_u64((u64)nr_pages * (u64)PAGE_SIZE, + (u64)THREAD_SIZE * 8UL); if (threads > max_threads_suggested) threads = max_threads_suggested; @@ -1075,17 +1121,24 @@ void __init fork_init(void) int i; #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR #ifndef ARCH_MIN_TASKALIGN -#define ARCH_MIN_TASKALIGN 0 +#define ARCH_MIN_TASKALIGN 0 #endif int align = max_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN); unsigned long useroffset, usersize; /* create a slab on which task_structs can be allocated */ task_struct_whitelist(&useroffset, &usersize); + #ifdef CONFIG_IEE task_struct_cachep = kmem_cache_create_usercopy("task_struct", arch_task_struct_size, align, - SLAB_PANIC|SLAB_ACCOUNT, + SLAB_PANIC|SLAB_ACCOUNT|SLAB_RED_ZONE, useroffset, usersize, NULL); + #else + task_struct_cachep = + kmem_cache_create_usercopy("task_struct", arch_task_struct_size, + align, SLAB_PANIC | SLAB_ACCOUNT, + useroffset, usersize, NULL); + #endif #endif /* do the arch specific task caches init */ @@ -1093,8 +1146,8 @@ void __init fork_init(void) set_max_threads(MAX_THREADS); - init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; - init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; + init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads / 2; + init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads / 2; init_task.signal->rlim[RLIMIT_SIGPENDING] = init_task.signal->rlim[RLIMIT_NPROC]; @@ -1107,8 +1160,8 @@ void __init fork_init(void) set_userns_rlimit_max(&init_user_ns, UCOUNT_RLIMIT_MEMLOCK, RLIM_INFINITY); #ifdef CONFIG_VMAP_STACK - cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", - NULL, free_vm_stack_cache); + cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", NULL, + free_vm_stack_cache); #endif scs_init(); @@ -1118,7 +1171,7 @@ void __init fork_init(void) } int __weak arch_dup_task_struct(struct task_struct *dst, - struct task_struct *src) + struct task_struct *src) { *dst = *src; return 0; @@ -1129,14 +1182,14 @@ void set_task_stack_end_magic(struct task_struct *tsk) unsigned long *stackend; stackend = end_of_stack(tsk); - *stackend = STACK_END_MAGIC; /* for overflow detection */ + *stackend = STACK_END_MAGIC; /* for overflow detection */ } static bool dup_resvd_task_struct(struct task_struct *dst, struct task_struct *orig, int node) { - dst->_resvd = kzalloc_node(sizeof(struct task_struct_resvd), - GFP_KERNEL, node); + dst->_resvd = kzalloc_node(sizeof(struct task_struct_resvd), GFP_KERNEL, + node); if (!dst->_resvd) return false; @@ -1309,7 +1362,7 @@ static void mm_init_uprobes_state(struct mm_struct *mm) } static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, - struct user_namespace *user_ns) + struct user_namespace *user_ns) { mt_init_flags(&mm->mm_mt, MM_MT_FLAGS); mt_set_external_lock(&mm->mm_mt, &mm->mmap_lock); @@ -1425,8 +1478,8 @@ EXPORT_SYMBOL_GPL(mmput); #ifdef CONFIG_MMU static void mmput_async_fn(struct work_struct *work) { - struct mm_struct *mm = container_of(work, struct mm_struct, - async_put_work); + struct mm_struct *mm = + container_of(work, struct mm_struct, async_put_work); __mmput(mm); } @@ -1602,13 +1655,12 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) struct mm_struct *mm; int err; - err = down_read_killable(&task->signal->exec_update_lock); + err = down_read_killable(&task->signal->exec_update_lock); if (err) return ERR_PTR(err); mm = get_task_mm(task); - if (mm && mm != current->mm && - !ptrace_may_access(task, mode)) { + if (mm && mm != current->mm && !ptrace_may_access(task, mode)) { mmput(mm); mm = ERR_PTR(-EACCES); } @@ -1631,7 +1683,7 @@ static void complete_vfork_done(struct task_struct *tsk) } static int wait_for_vfork_done(struct task_struct *child, - struct completion *vfork) + struct completion *vfork) { unsigned int state = TASK_UNINTERRUPTIBLE|TASK_KILLABLE|TASK_FREEZABLE; int killed; @@ -1682,8 +1734,8 @@ static void mm_release(struct task_struct *tsk, struct mm_struct *mm) * not set up a proper pointer then tough luck. */ put_user(0, tsk->clear_child_tid); - do_futex(tsk->clear_child_tid, FUTEX_WAKE, - 1, NULL, NULL, 0, 0); + do_futex(tsk->clear_child_tid, FUTEX_WAKE, 1, NULL, + NULL, 0, 0); } tsk->clear_child_tid = NULL; } @@ -1767,6 +1819,10 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) #endif tsk->mm = NULL; +#ifdef CONFIG_IEE + iee_set_token_mm(tsk, NULL); + iee_set_token_pgd(tsk, NULL); +#endif tsk->active_mm = NULL; /* @@ -1798,6 +1854,10 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) } tsk->mm = mm; +#ifdef CONFIG_IEE + iee_set_token_mm(tsk, mm); + iee_set_token_pgd(tsk, mm->pgd); +#endif tsk->active_mm = mm; sched_mm_cid_fork(tsk); return 0; @@ -2015,8 +2075,8 @@ static inline void init_task_pid_links(struct task_struct *task) INIT_HLIST_NODE(&task->pid_links[type]); } -static inline void -init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid) +static inline void init_task_pid(struct task_struct *task, enum pid_type type, + struct pid *pid) { if (type == PIDTYPE_PID) task->thread_pid = pid; @@ -2277,6 +2337,12 @@ static void copy_oom_score_adj(u64 clone_flags, struct task_struct *tsk) mutex_unlock(&oom_adj_mutex); } +#if defined(CONFIG_KOI) && !defined(CONFIG_IEE) +extern s64 koi_offset; +extern int koi_add_page_mapping(unsigned long dst, unsigned long src); +#endif + + #ifdef CONFIG_RV static void rv_task_fork(struct task_struct *p) { @@ -2309,15 +2375,21 @@ __latent_entropy struct task_struct *copy_process( struct file *pidfile = NULL; const u64 clone_flags = args->flags; struct nsproxy *nsp = current->nsproxy; + #ifdef CONFIG_IEE + gfp_t gfp; + void *pstack; + #endif /* * Don't allow sharing the root directory with processes in a different * namespace */ - if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) + if ((clone_flags & (CLONE_NEWNS | CLONE_FS)) == + (CLONE_NEWNS | CLONE_FS)) return ERR_PTR(-EINVAL); - if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) + if ((clone_flags & (CLONE_NEWUSER | CLONE_FS)) == + (CLONE_NEWUSER | CLONE_FS)) return ERR_PTR(-EINVAL); /* @@ -2342,7 +2414,7 @@ __latent_entropy struct task_struct *copy_process( * from creating siblings. */ if ((clone_flags & CLONE_PARENT) && - current->signal->flags & SIGNAL_UNKILLABLE) + current->signal->flags & SIGNAL_UNKILLABLE) return ERR_PTR(-EINVAL); /* @@ -2387,6 +2459,15 @@ __latent_entropy struct task_struct *copy_process( p = dup_task_struct(current, node); if (!p) goto fork_out; + #ifdef CONFIG_IEE + // Alloc iee stack. + gfp = GFP_KERNEL; + pstack = (void *)__get_free_pages(gfp, 3); + iee_set_kernel_upage((unsigned long)pstack); + // Init token. + iee_init_token(p, NULL, pstack + PAGE_SIZE * 4); + #endif + p->flags &= ~PF_KTHREAD; if (args->kthread) p->flags |= PF_KTHREAD; @@ -2408,7 +2489,8 @@ __latent_entropy struct task_struct *copy_process( /* * Clear TID on mm_release()? */ - p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? args->child_tid : NULL; + p->clear_child_tid = + (clone_flags & CLONE_CHILD_CLEARTID) ? args->child_tid : NULL; ftrace_graph_init_task(p); @@ -2519,10 +2601,10 @@ __latent_entropy struct task_struct *copy_process( #endif #ifdef CONFIG_TRACE_IRQFLAGS memset(&p->irqtrace, 0, sizeof(p->irqtrace)); - p->irqtrace.hardirq_disable_ip = _THIS_IP_; - p->irqtrace.softirq_enable_ip = _THIS_IP_; - p->softirqs_enabled = 1; - p->softirq_context = 0; + p->irqtrace.hardirq_disable_ip = _THIS_IP_; + p->irqtrace.softirq_enable_ip = _THIS_IP_; + p->softirqs_enabled = 1; + p->softirq_context = 0; #endif p->pagefault_disabled = 0; @@ -2535,8 +2617,8 @@ __latent_entropy struct task_struct *copy_process( p->blocked_on = NULL; /* not blocked yet */ #endif #ifdef CONFIG_BCACHE - p->sequential_io = 0; - p->sequential_io_avg = 0; + p->sequential_io = 0; + p->sequential_io_avg = 0; #endif #ifdef CONFIG_BPF_SYSCALL RCU_INIT_POINTER(p->bpf_storage, NULL); @@ -2623,7 +2705,7 @@ __latent_entropy struct task_struct *copy_process( /* * sigaltstack should be cleared when sharing the same VM */ - if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) + if ((clone_flags & (CLONE_VM | CLONE_VFORK)) == CLONE_VM) sas_ss_reset(p); /* @@ -2702,7 +2784,7 @@ __latent_entropy struct task_struct *copy_process( write_lock_irq(&tasklist_lock); /* CLONE_PARENT re-uses the old parent */ - if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { + if (clone_flags & (CLONE_PARENT | CLONE_THREAD)) { p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; if (clone_flags & CLONE_THREAD) @@ -2766,8 +2848,9 @@ __latent_entropy struct task_struct *copy_process( * tasklist_lock with adding child to the process tree * for propagate_has_child_subreaper optimization. */ - p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper || - p->real_parent->signal->is_child_subreaper; + p->signal->has_child_subreaper = + p->real_parent->signal->has_child_subreaper || + p->real_parent->signal->is_child_subreaper; list_add_tail(&p->sibling, &p->real_parent->children); list_add_tail_rcu(&p->tasks, &init_task.tasks); attach_pid(p, PIDTYPE_TGID); @@ -2918,8 +3001,8 @@ struct task_struct * __init fork_idle(int cpu) */ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node) { - unsigned long flags = CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD| - CLONE_IO; + unsigned long flags = CLONE_FS | CLONE_FILES | CLONE_SIGHAND | + CLONE_THREAD | CLONE_IO; struct kernel_clone_args args = { .flags = ((lower_32_bits(flags) | CLONE_VM | CLONE_UNTRACED) & ~CSIGNAL), @@ -3083,8 +3166,8 @@ SYSCALL_DEFINE0(fork) SYSCALL_DEFINE0(vfork) { struct kernel_clone_args args = { - .flags = CLONE_VFORK | CLONE_VM, - .exit_signal = SIGCHLD, + .flags = CLONE_VFORK | CLONE_VM, + .exit_signal = SIGCHLD, }; return kernel_clone(&args); @@ -3094,35 +3177,30 @@ SYSCALL_DEFINE0(vfork) #ifdef __ARCH_WANT_SYS_CLONE #ifdef CONFIG_CLONE_BACKWARDS SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, - int __user *, parent_tidptr, - unsigned long, tls, - int __user *, child_tidptr) + int __user *, parent_tidptr, unsigned long, tls, int __user *, + child_tidptr) #elif defined(CONFIG_CLONE_BACKWARDS2) SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags, - int __user *, parent_tidptr, - int __user *, child_tidptr, - unsigned long, tls) -#elif defined(CONFIG_CLONE_BACKWARDS3) -SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp, - int, stack_size, - int __user *, parent_tidptr, - int __user *, child_tidptr, + int __user *, parent_tidptr, int __user *, child_tidptr, unsigned long, tls) +#elif defined(CONFIG_CLONE_BACKWARDS3) +SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp, int, + stack_size, int __user *, parent_tidptr, int __user *, + child_tidptr, unsigned long, tls) #else SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, - int __user *, parent_tidptr, - int __user *, child_tidptr, - unsigned long, tls) + int __user *, parent_tidptr, int __user *, child_tidptr, + unsigned long, tls) #endif { struct kernel_clone_args args = { - .flags = (lower_32_bits(clone_flags) & ~CSIGNAL), - .pidfd = parent_tidptr, - .child_tid = child_tidptr, - .parent_tid = parent_tidptr, - .exit_signal = (lower_32_bits(clone_flags) & CSIGNAL), - .stack = newsp, - .tls = tls, + .flags = (lower_32_bits(clone_flags) & ~CSIGNAL), + .pidfd = parent_tidptr, + .child_tid = child_tidptr, + .parent_tid = parent_tidptr, + .exit_signal = (lower_32_bits(clone_flags) & CSIGNAL), + .stack = newsp, + .tls = tls, }; return kernel_clone(&args); @@ -3178,21 +3256,21 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, return -EINVAL; *kargs = (struct kernel_clone_args){ - .flags = args.flags, - .pidfd = u64_to_user_ptr(args.pidfd), - .child_tid = u64_to_user_ptr(args.child_tid), - .parent_tid = u64_to_user_ptr(args.parent_tid), - .exit_signal = args.exit_signal, - .stack = args.stack, - .stack_size = args.stack_size, - .tls = args.tls, - .set_tid_size = args.set_tid_size, - .cgroup = args.cgroup, + .flags = args.flags, + .pidfd = u64_to_user_ptr(args.pidfd), + .child_tid = u64_to_user_ptr(args.child_tid), + .parent_tid = u64_to_user_ptr(args.parent_tid), + .exit_signal = args.exit_signal, + .stack = args.stack, + .stack_size = args.stack_size, + .tls = args.tls, + .set_tid_size = args.set_tid_size, + .cgroup = args.cgroup, }; if (args.set_tid && - copy_from_user(kset_tid, u64_to_user_ptr(args.set_tid), - (kargs->set_tid_size * sizeof(pid_t)))) + copy_from_user(kset_tid, u64_to_user_ptr(args.set_tid), + (kargs->set_tid_size * sizeof(pid_t)))) return -EFAULT; kargs->set_tid = kset_tid; @@ -3287,7 +3365,8 @@ SYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size) } #endif -void walk_process_tree(struct task_struct *top, proc_visitor visitor, void *data) +void walk_process_tree(struct task_struct *top, proc_visitor visitor, + void *data) { struct task_struct *leader, *parent, *child; int res; @@ -3295,8 +3374,8 @@ void walk_process_tree(struct task_struct *top, proc_visitor visitor, void *data read_lock(&tasklist_lock); leader = top = top->group_leader; down: - for_each_thread(leader, parent) { - list_for_each_entry(child, &parent->children, sibling) { + for_each_thread (leader, parent) { + list_for_each_entry (child, &parent->children, sibling) { res = visitor(child, data); if (res) { if (res < 0) @@ -3304,8 +3383,7 @@ void walk_process_tree(struct task_struct *top, proc_visitor visitor, void *data leader = child; goto down; } -up: - ; + up:; } } @@ -3382,11 +3460,11 @@ void __init proc_caches_init(void) */ static int check_unshare_flags(unsigned long unshare_flags) { - if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| - CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| - CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET| - CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP| - CLONE_NEWTIME)) + if (unshare_flags & + ~(CLONE_THREAD | CLONE_FS | CLONE_NEWNS | CLONE_SIGHAND | CLONE_VM | + CLONE_FILES | CLONE_SYSVSEM | CLONE_NEWUTS | CLONE_NEWIPC | + CLONE_NEWNET | CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWCGROUP | + CLONE_NEWTIME)) return -EINVAL; /* * Not implemented, but pretend it works if there is nothing @@ -3497,7 +3575,7 @@ int ksys_unshare(unsigned long unshare_flags) * to a new ipc namespace, the semaphore arrays from the old * namespace are unreachable. */ - if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) + if (unshare_flags & (CLONE_NEWIPC | CLONE_SYSVSEM)) do_sysvsem = 1; err = unshare_fs(unshare_flags, &new_fs); if (err) @@ -3508,8 +3586,8 @@ int ksys_unshare(unsigned long unshare_flags) err = unshare_userns(unshare_flags, &new_cred); if (err) goto bad_unshare_cleanup_fd; - err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, - new_cred, new_fs); + err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_cred, + new_fs); if (err) goto bad_unshare_cleanup_cred; @@ -3606,8 +3684,8 @@ int unshare_files(void) return 0; } -int sysctl_max_threads(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) +int sysctl_max_threads(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) { struct ctl_table t; int ret; diff --git a/kernel/groups.c b/kernel/groups.c index 9b43da22647d..8045812e8a3c 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -11,6 +11,9 @@ #include <linux/user_namespace.h> #include <linux/vmalloc.h> #include <linux/uaccess.h> +#ifdef CONFIG_CREDP +#include <asm/iee-cred.h> +#endif struct group_info *groups_alloc(int gidsetsize) { @@ -119,7 +122,11 @@ void set_groups(struct cred *new, struct group_info *group_info) { put_group_info(new->group_info); get_group_info(group_info); + #ifdef CONFIG_CREDP + iee_set_cred_group_info(new,group_info); + #else new->group_info = group_info; + #endif } EXPORT_SYMBOL(set_groups); diff --git a/kernel/kthread.c b/kernel/kthread.c index 1eea53050bab..317eac6eb2f2 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -30,6 +30,10 @@ #include <linux/sched/isolation.h> #include <trace/events/sched.h> +#ifdef CONFIG_IEE +#include <linux/iee-func.h> +#endif + static DEFINE_SPINLOCK(kthread_create_lock); static LIST_HEAD(kthread_create_list); @@ -1429,6 +1433,10 @@ void kthread_use_mm(struct mm_struct *mm) tsk->active_mm = mm; tsk->mm = mm; membarrier_update_current_mm(mm); + #ifdef CONFIG_IEE + iee_set_token_mm(tsk, mm); + iee_set_token_pgd(tsk, mm->pgd); + #endif switch_mm_irqs_off(active_mm, mm, tsk); local_irq_enable(); task_unlock(tsk); @@ -1473,7 +1481,12 @@ void kthread_unuse_mm(struct mm_struct *mm) local_irq_disable(); tsk->mm = NULL; membarrier_update_current_mm(NULL); + #ifdef CONFIG_IEE + iee_set_token_mm(tsk, mm); + iee_set_token_pgd(tsk, NULL); + #endif mmgrab_lazy_tlb(mm); + /* active_mm is still 'mm' */ enter_lazy_tlb(mm, tsk); local_irq_enable(); diff --git a/kernel/smpboot.c b/kernel/smpboot.c index f47d8f375946..60c7d365c0e1 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -16,6 +16,10 @@ #include <linux/kthread.h> #include <linux/smpboot.h> +#ifdef CONFIG_IEE +#include <linux/iee-func.h> +#endif + #include "smpboot.h" #ifdef CONFIG_SMP @@ -57,6 +61,11 @@ static __always_inline void idle_init(unsigned int cpu) pr_err("SMP: fork_idle() failed for CPU %u\n", cpu); else per_cpu(idle_threads, cpu) = tsk; + #ifdef CONFIG_IEE + // Set the secondary __entry_task. + *(struct task_struct **)SHIFT_PERCPU_PTR(__entry_task,__per_cpu_offset[cpu]) = tsk; + iee_set_logical_mem_ro((unsigned long)SHIFT_PERCPU_PTR(__entry_task,__per_cpu_offset[cpu])); + #endif } } diff --git a/kernel/sys.c b/kernel/sys.c index 44b575990333..fbc47f83af50 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -75,6 +75,10 @@ #include <asm/io.h> #include <asm/unistd.h> +#ifdef CONFIG_CREDP +#include <asm/iee-cred.h> +#endif + #include "uid16.h" #ifndef SET_UNALIGN_CTL @@ -395,7 +399,11 @@ long __sys_setregid(gid_t rgid, gid_t egid) if (gid_eq(old->gid, krgid) || gid_eq(old->egid, krgid) || ns_capable_setid(old->user_ns, CAP_SETGID)) + #ifdef CONFIG_CREDP + iee_set_cred_gid(new,krgid); + #else new->gid = krgid; + #endif else goto error; } @@ -404,15 +412,27 @@ long __sys_setregid(gid_t rgid, gid_t egid) gid_eq(old->egid, kegid) || gid_eq(old->sgid, kegid) || ns_capable_setid(old->user_ns, CAP_SETGID)) + #ifdef CONFIG_CREDP + iee_set_cred_egid(new,kegid); + #else new->egid = kegid; + #endif else goto error; } if (rgid != (gid_t) -1 || (egid != (gid_t) -1 && !gid_eq(kegid, old->gid))) + #ifdef CONFIG_CREDP + iee_set_cred_sgid(new,new->egid); + #else new->sgid = new->egid; + #endif + #ifdef CONFIG_CREDP + iee_set_cred_fsgid(new,new->egid); + #else new->fsgid = new->egid; + #endif retval = security_task_fix_setgid(new, old, LSM_SETID_RE); if (retval < 0) @@ -454,9 +474,25 @@ long __sys_setgid(gid_t gid) retval = -EPERM; if (ns_capable_setid(old->user_ns, CAP_SETGID)) + #ifdef CONFIG_CREDP + { + iee_set_cred_fsgid(new,kgid); + iee_set_cred_sgid(new,kgid); + iee_set_cred_egid(new,kgid); + iee_set_cred_gid(new,kgid); + } + #else new->gid = new->egid = new->sgid = new->fsgid = kgid; + #endif else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid)) + #ifdef CONFIG_CREDP + { + iee_set_cred_fsgid(new,kgid); + iee_set_cred_egid(new,kgid); + } + #else new->egid = new->fsgid = kgid; + #endif else goto error; @@ -488,7 +524,11 @@ static int set_user(struct cred *new) return -EAGAIN; free_uid(new->user); + #ifdef CONFIG_CREDP + iee_set_cred_user(new,new_user); + #else new->user = new_user; + #endif return 0; } @@ -549,7 +589,11 @@ long __sys_setreuid(uid_t ruid, uid_t euid) retval = -EPERM; if (ruid != (uid_t) -1) { + #ifdef CONFIG_CREDP + iee_set_cred_uid(new,kruid); + #else new->uid = kruid; + #endif if (!uid_eq(old->uid, kruid) && !uid_eq(old->euid, kruid) && !ns_capable_setid(old->user_ns, CAP_SETUID)) @@ -557,7 +601,11 @@ long __sys_setreuid(uid_t ruid, uid_t euid) } if (euid != (uid_t) -1) { + #ifdef CONFIG_CREDP + iee_set_cred_euid(new,keuid); + #else new->euid = keuid; + #endif if (!uid_eq(old->uid, keuid) && !uid_eq(old->euid, keuid) && !uid_eq(old->suid, keuid) && @@ -572,8 +620,16 @@ long __sys_setreuid(uid_t ruid, uid_t euid) } if (ruid != (uid_t) -1 || (euid != (uid_t) -1 && !uid_eq(keuid, old->uid))) + #ifdef CONFIG_CREDP + iee_set_cred_suid(new,new->euid); + #else new->suid = new->euid; + #endif + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new,new->euid); + #else new->fsuid = new->euid; + #endif retval = security_task_fix_setuid(new, old, LSM_SETID_RE); if (retval < 0) @@ -626,7 +682,12 @@ long __sys_setuid(uid_t uid) retval = -EPERM; if (ns_capable_setid(old->user_ns, CAP_SETUID)) { + #ifdef CONFIG_CREDP + iee_set_cred_uid(new,kuid); + iee_set_cred_suid(new,kuid); + #else new->suid = new->uid = kuid; + #endif if (!uid_eq(kuid, old->uid)) { retval = set_user(new); if (retval < 0) @@ -636,7 +697,12 @@ long __sys_setuid(uid_t uid) goto error; } + #ifdef CONFIG_CREDP + iee_set_cred_euid(new,kuid); + iee_set_cred_fsuid(new,kuid); + #else new->fsuid = new->euid = kuid; + #endif retval = security_task_fix_setuid(new, old, LSM_SETID_ID); if (retval < 0) @@ -710,7 +776,11 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) return -ENOMEM; if (ruid != (uid_t) -1) { + #ifdef CONFIG_CREDP + iee_set_cred_uid(new,kruid); + #else new->uid = kruid; + #endif if (!uid_eq(kruid, old->uid)) { retval = set_user(new); if (retval < 0) @@ -718,10 +788,22 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) } } if (euid != (uid_t) -1) + #ifdef CONFIG_CREDP + iee_set_cred_euid(new,keuid); + #else new->euid = keuid; + #endif if (suid != (uid_t) -1) + #ifdef CONFIG_CREDP + iee_set_cred_suid(new,ksuid); + #else new->suid = ksuid; + #endif + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new,new->euid); + #else new->fsuid = new->euid; + #endif retval = security_task_fix_setuid(new, old, LSM_SETID_RES); if (retval < 0) @@ -810,12 +892,29 @@ long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) return -ENOMEM; if (rgid != (gid_t) -1) + #ifdef CONFIG_CREDP + iee_set_cred_gid(new,krgid); + #else new->gid = krgid; + #endif if (egid != (gid_t) -1) + #ifdef CONFIG_CREDP + iee_set_cred_egid(new,kegid); + #else new->egid = kegid; + #endif if (sgid != (gid_t) -1) + #ifdef CONFIG_CREDP + iee_set_cred_sgid(new,ksgid); + #else new->sgid = ksgid; + #endif + + #ifdef CONFIG_CREDP + iee_set_cred_fsgid(new,new->egid); + #else new->fsgid = new->egid; + #endif retval = security_task_fix_setgid(new, old, LSM_SETID_RES); if (retval < 0) @@ -882,7 +981,11 @@ long __sys_setfsuid(uid_t uid) uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) || ns_capable_setid(old->user_ns, CAP_SETUID)) { if (!uid_eq(kuid, old->fsuid)) { + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new,kuid); + #else new->fsuid = kuid; + #endif if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) goto change_okay; } @@ -926,7 +1029,11 @@ long __sys_setfsgid(gid_t gid) gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) || ns_capable_setid(old->user_ns, CAP_SETGID)) { if (!gid_eq(kgid, old->fsgid)) { + #ifdef CONFIG_CREDP + iee_set_cred_fsgid(new,kgid); + #else new->fsgid = kgid; + #endif if (security_task_fix_setgid(new,old,LSM_SETID_FS) == 0) goto change_okay; } diff --git a/kernel/umh.c b/kernel/umh.c index 1b13c5d34624..32f5c88e10bf 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -32,6 +32,10 @@ #include <trace/events/module.h> +#ifdef CONFIG_CREDP +#include <asm/iee-cred.h> +#endif + static kernel_cap_t usermodehelper_bset = CAP_FULL_SET; static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET; static DEFINE_SPINLOCK(umh_sysctl_lock); @@ -91,9 +95,15 @@ static int call_usermodehelper_exec_async(void *data) goto out; spin_lock(&umh_sysctl_lock); + #ifdef CONFIG_CREDP + iee_set_cred_cap_bset(new,cap_intersect(usermodehelper_bset, new->cap_bset)); + iee_set_cred_cap_inheritable(new,cap_intersect(usermodehelper_inheritable, + new->cap_inheritable)); + #else new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset); new->cap_inheritable = cap_intersect(usermodehelper_inheritable, new->cap_inheritable); + #endif spin_unlock(&umh_sysctl_lock); if (sub_info->init) { diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 1d8e47bed3f1..9f1921025539 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -22,6 +22,10 @@ #include <linux/bsearch.h> #include <linux/sort.h> +#ifdef CONFIG_CREDP +#include <asm/iee-cred.h> +#endif + static struct kmem_cache *user_ns_cachep __read_mostly; static DEFINE_MUTEX(userns_state_mutex); @@ -45,6 +49,19 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) /* Start with the same capabilities as init but useless for doing * anything as the capabilities are bound to the new user namespace. */ + #ifdef CONFIG_CREDP + iee_set_cred_securebits(cred,SECUREBITS_DEFAULT); + iee_set_cred_cap_inheritable(cred,CAP_EMPTY_SET); + iee_set_cred_cap_permitted(cred,CAP_FULL_SET); + iee_set_cred_cap_effective(cred,CAP_FULL_SET); + iee_set_cred_cap_ambient(cred,CAP_EMPTY_SET); + iee_set_cred_cap_bset(cred,CAP_FULL_SET); +#ifdef CONFIG_KEYS + key_put(cred->request_key_auth); + iee_set_cred_request_key_auth(cred,NULL); +#endif + iee_set_cred_user_ns(cred,user_ns); + #else cred->securebits = SECUREBITS_DEFAULT; cred->cap_inheritable = CAP_EMPTY_SET; cred->cap_permitted = CAP_FULL_SET; @@ -57,6 +74,7 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) #endif /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ cred->user_ns = user_ns; + #endif } static unsigned long enforced_nproc_rlimit(void) diff --git a/mm/Kconfig b/mm/Kconfig index 45d4139c959c..eb9d41768c15 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -530,6 +530,18 @@ config NUMA_KEEP_MEMINFO config MEMORY_ISOLATION bool +# Config for kernel module isolation +config KOI + depends on ARM64 + depends on ARM64_VA_BITS_48 + depends on ARM64_4K_PAGES + def_bool n + +# Configs for pgtable isolation +config PTP + depends on IEE + def_bool y + # IORESOURCE_SYSTEM_RAM regions in the kernel resource tree that are marked # IORESOURCE_EXCLUSIVE cannot be mapped to user space, for example, via # /dev/mem. diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c index d25d99cb5f2b..2ea51f559d4e 100644 --- a/mm/damon/ops-common.c +++ b/mm/damon/ops-common.c @@ -44,6 +44,7 @@ void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr if (!folio) return; + if (ptep_clear_young_notify(vma, addr, pte)) folio_set_young(folio); diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 13f0d1192707..60dc95c5b286 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -452,7 +452,11 @@ static void __init pmd_huge_tests(struct pgtable_debug_args *args) * X86 defined pmd_set_huge() verifies that the given * PMD is not a populated non-leaf entry. */ + #ifdef CONFIG_PTP + set_pmd(args->pmdp, __pmd(0)); + #else WRITE_ONCE(*args->pmdp, __pmd(0)); + #endif WARN_ON(!pmd_set_huge(args->pmdp, __pfn_to_phys(args->fixed_pmd_pfn), args->page_prot)); WARN_ON(!pmd_clear_huge(args->pmdp)); pmd = READ_ONCE(*args->pmdp); @@ -472,7 +476,11 @@ static void __init pud_huge_tests(struct pgtable_debug_args *args) * X86 defined pud_set_huge() verifies that the given * PUD is not a populated non-leaf entry. */ + #ifdef CONFIG_PTP + set_pud(args->pudp, __pud(0)); + #else WRITE_ONCE(*args->pudp, __pud(0)); + #endif WARN_ON(!pud_set_huge(args->pudp, __pfn_to_phys(args->fixed_pud_pfn), args->page_prot)); WARN_ON(!pud_clear_huge(args->pudp)); pud = READ_ONCE(*args->pudp); @@ -511,7 +519,11 @@ static void __init pud_clear_tests(struct pgtable_debug_args *args) pr_debug("Validating PUD clear\n"); pud = __pud(pud_val(pud) | RANDOM_ORVALUE); + #ifdef CONFIG_PTP + set_pud(args->pudp, pud); + #else WRITE_ONCE(*args->pudp, pud); + #endif pud_clear(args->pudp); pud = READ_ONCE(*args->pudp); WARN_ON(!pud_none(pud)); @@ -548,7 +560,11 @@ static void __init p4d_clear_tests(struct pgtable_debug_args *args) pr_debug("Validating P4D clear\n"); p4d = __p4d(p4d_val(p4d) | RANDOM_ORVALUE); + #ifdef CONFIG_PTP + set_p4d(args->p4dp, p4d); + #else WRITE_ONCE(*args->p4dp, p4d); + #endif p4d_clear(args->p4dp); p4d = READ_ONCE(*args->p4dp); WARN_ON(!p4d_none(p4d)); @@ -582,7 +598,11 @@ static void __init pgd_clear_tests(struct pgtable_debug_args *args) pr_debug("Validating PGD clear\n"); pgd = __pgd(pgd_val(pgd) | RANDOM_ORVALUE); + #ifdef CONFIG_PTP + set_pgd(args->pgdp, pgd); + #else WRITE_ONCE(*args->pgdp, pgd); + #endif pgd_clear(args->pgdp); pgd = READ_ONCE(*args->pgdp); WARN_ON(!pgd_none(pgd)); @@ -650,7 +670,11 @@ static void __init pmd_clear_tests(struct pgtable_debug_args *args) pr_debug("Validating PMD clear\n"); pmd = __pmd(pmd_val(pmd) | RANDOM_ORVALUE); + #ifdef CONFIG_PTP + set_pmd(args->pmdp, pmd); + #else WRITE_ONCE(*args->pmdp, pmd); + #endif pmd_clear(args->pmdp); pmd = READ_ONCE(*args->pmdp); WARN_ON(!pmd_none(pmd)); diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c index ce06b2884789..a039c7a50ec5 100644 --- a/mm/early_ioremap.c +++ b/mm/early_ioremap.c @@ -147,7 +147,11 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) if (after_paging_init) __late_set_fixmap(idx, phys_addr, prot); else + #ifdef CONFIG_PTP + __iee_set_fixmap_pre_init(idx, phys_addr, prot); + #else __early_set_fixmap(idx, phys_addr, prot); + #endif phys_addr += PAGE_SIZE; --idx; --nrpages; @@ -199,13 +203,66 @@ void __init early_iounmap(void __iomem *addr, unsigned long size) if (after_paging_init) __late_clear_fixmap(idx); else + #ifdef CONFIG_PTP + __iee_set_fixmap_pre_init(idx, 0, FIXMAP_PAGE_CLEAR); + #else __early_set_fixmap(idx, 0, FIXMAP_PAGE_CLEAR); + #endif --idx; --nrpages; } prev_map[slot] = NULL; } +#ifdef CONFIG_PTP +void __init early_iounmap_after_init(void __iomem *addr, unsigned long size) +{ + unsigned long virt_addr; + unsigned long offset; + unsigned int nrpages; + enum fixed_addresses idx; + int i, slot; + + slot = -1; + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { + if (prev_map[i] == addr) { + slot = i; + break; + } + } + + if (WARN(slot < 0, "early_iounmap(%p, %08lx) not found slot\n", + addr, size)) + return; + + if (WARN(prev_size[slot] != size, + "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n", + addr, size, slot, prev_size[slot])) + return; + + WARN(early_ioremap_debug, "early_iounmap(%p, %08lx) [%d]\n", + addr, size, slot); + + virt_addr = (unsigned long)addr; + if (WARN_ON(virt_addr < fix_to_virt(FIX_BTMAP_BEGIN))) + return; + + offset = offset_in_page(virt_addr); + nrpages = PAGE_ALIGN(offset + size) >> PAGE_SHIFT; + + idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; + while (nrpages > 0) { + if (after_paging_init) + __late_clear_fixmap(idx); + else + __early_set_fixmap(idx, 0, FIXMAP_PAGE_CLEAR); + --idx; + --nrpages; + } + prev_map[slot] = NULL; +} +#endif + /* Remap an IO device */ void __init __iomem * early_ioremap(resource_size_t phys_addr, unsigned long size) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 763bb25e4f99..80bb2c0abeda 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -39,6 +39,10 @@ #include <linux/memory-tiers.h> #include <linux/compat.h> +#ifdef CONFIG_PTP +#include <linux/iee-func.h> +#endif + #include <asm/tlb.h> #include <asm/pgalloc.h> #include "internal.h" @@ -2489,6 +2493,10 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, unsigned long addr; pte_t *pte; int i; + #ifdef CONFIG_PTP + pte_t *ptep; + unsigned long iee_addr; + #endif /* * Leave pmd empty until pte is filled note that it is fine to delay @@ -2501,7 +2509,14 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, old_pmd = pmdp_huge_clear_flush(vma, haddr, pmd); pgtable = pgtable_trans_huge_withdraw(mm, pmd); - pmd_populate(mm, &_pmd, pgtable); + #ifdef CONFIG_PTP + ptep = (pte_t *)page_address(pgtable); + iee_addr = __phys_to_iee(__pa(ptep)); + set_iee_page_valid(iee_addr); + iee_set_logical_mem_ro((unsigned long)ptep); + #endif + //pmd_populate(mm, &_pmd, pgtable); + _pmd = __pmd(__phys_to_pmd_val(page_to_phys(pgtable)) | PMD_TYPE_TABLE); pte = pte_offset_map(&_pmd, haddr); VM_BUG_ON(!pte); @@ -2534,6 +2549,10 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr; pte_t *pte; int i; + #ifdef CONFIG_PTP + pte_t *ptep; + unsigned long iee_addr; + #endif VM_BUG_ON(haddr & ~HPAGE_PMD_MASK); VM_BUG_ON_VMA(vma->vm_start > haddr, vma); @@ -2671,7 +2690,14 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, * This's critical for some architectures (Power). */ pgtable = pgtable_trans_huge_withdraw(mm, pmd); - pmd_populate(mm, &_pmd, pgtable); + #ifdef CONFIG_PTP + ptep = (pte_t *)page_to_virt(pgtable); + iee_addr = __phys_to_iee(__pa(ptep)); + set_iee_page_valid(iee_addr); + iee_set_logical_mem_ro((unsigned long)ptep); + #endif + //pmd_populate(mm, &_pmd, pgtable); + _pmd = __pmd(__phys_to_pmd_val(page_to_phys(pgtable)) | PMD_TYPE_TABLE); pte = pte_offset_map(&_pmd, haddr); VM_BUG_ON(!pte); diff --git a/mm/init-mm.c b/mm/init-mm.c index 24c809379274..07d060fca6f0 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -55,3 +55,20 @@ void setup_initial_init_mm(void *start_code, void *end_code, init_mm.end_data = (unsigned long)end_data; init_mm.brk = (unsigned long)brk; } + +#ifdef CONFIG_KOI +/* + * This is used to init ko_mm when creating pgtable for a ko to be isolated + * the ko_mm belongs to a specific ko, pgdp is allocated by koi_pgd_alloc + */ +void init_ko_mm(struct mm_struct *ko_mm, pgd_t *pgdp) { + ko_mm->mm_rb = RB_ROOT; + ko_mm->pgd = pgdp; + ko_mm->mm_users = (atomic_t)ATOMIC_INIT(2); + ko_mm->mm_count = (atomic_t)ATOMIC_INIT(1); + ko_mm->mmap_lock = (struct rw_semaphore)__RWSEM_INITIALIZER(ko_mm->mmap_lock); + ko_mm->page_table_lock = __SPIN_LOCK_UNLOCKED(ko_mm.page_table_lock); + ko_mm->arg_lock = __SPIN_LOCK_UNLOCKED(ko_mm->arg_lock); + ko_mm->mmlist = (struct list_head)LIST_HEAD_INIT(ko_mm->mmlist); +} +#endif diff --git a/mm/memory.c b/mm/memory.c index 4ef917a182f9..28da89a19e30 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -80,6 +80,10 @@ #include <linux/userswap.h> #include <linux/dynamic_pool.h> +#ifdef CONFIG_PTP +#include <linux/iee-func.h> +#endif + #include <trace/events/kmem.h> #include <asm/io.h> @@ -5872,6 +5876,11 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) spin_lock(&mm->page_table_lock); if (!p4d_present(*p4d)) { + #ifdef CONFIG_PTP + unsigned long iee_addr = __phys_to_iee(__pa(new)); + set_iee_page_valid(iee_addr); + iee_set_logical_mem_ro((unsigned long)new); + #endif mm_inc_nr_puds(mm); smp_wmb(); /* See comment in pmd_install() */ p4d_populate(mm, p4d, new); @@ -5896,6 +5905,11 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) ptl = pud_lock(mm, pud); if (!pud_present(*pud)) { + #ifdef CONFIG_PTP + unsigned long iee_addr = __phys_to_iee(__pa(new)); + set_iee_page_valid(iee_addr); + iee_set_logical_mem_ro((unsigned long)new); + #endif mm_inc_nr_pmds(mm); smp_wmb(); /* See comment in pmd_install() */ pud_populate(mm, pud, new); diff --git a/mm/slub.c b/mm/slub.c index ee3e32cdb7fd..20a45a7feed5 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -42,6 +42,11 @@ #include <kunit/test-bug.h> #include <linux/sort.h> +#ifdef CONFIG_IEE +#include <linux/iee-func.h> +#include <asm/iee-access.h> +#endif + #include <linux/debugfs.h> #include <trace/events/kmem.h> @@ -317,6 +322,7 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) /* * Tracking user of a slab. */ +#ifndef CONFIG_IEE #define TRACK_ADDRS_COUNT 16 struct track { unsigned long addr; /* Called from address */ @@ -329,6 +335,7 @@ struct track { }; enum track_item { TRACK_ALLOC, TRACK_FREE }; +#endif #ifdef SLAB_SUPPORTS_SYSFS static int sysfs_slab_add(struct kmem_cache *); @@ -379,7 +386,9 @@ static struct workqueue_struct *flushwq; * freeptr_t represents a SLUB freelist pointer, which might be encoded * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled. */ +#ifndef CONFIG_IEE typedef struct { unsigned long v; } freeptr_t; +#endif /* * Returns freelist pointer (ptr). With hardening, this is obfuscated @@ -464,7 +473,14 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) #endif freeptr_addr = (unsigned long)kasan_reset_tag((void *)freeptr_addr); + #ifdef CONFIG_IEE + if(IS_ENABLED(CONFIG_CREDP) && strcmp(s->name, "cred_jar") == 0) + iee_set_freeptr((freeptr_t *)freeptr_addr, freelist_ptr_encode(s, fp, freeptr_addr)); + else + *(freeptr_t *)freeptr_addr = freelist_ptr_encode(s, fp, freeptr_addr); + #else *(freeptr_t *)freeptr_addr = freelist_ptr_encode(s, fp, freeptr_addr); + #endif } /* Loop over all objects in a slab */ @@ -809,7 +825,34 @@ static void set_track_update(struct kmem_cache *s, void *object, depot_stack_handle_t handle) { struct track *p = get_track(s, object, alloc); +#ifdef CONFIG_IEE + struct track tmp; +#endif +#ifdef CONFIG_IEE + if(IS_ENABLED(CONFIG_CREDP) && strcmp(s->name, "cred_jar") == 0) + { + tmp = *p; + #ifdef CONFIG_STACKDEPOT + tmp.handle = handle; + #endif + tmp.addr = addr; + tmp.cpu = smp_processor_id(); + tmp.pid = current->pid; + tmp.when = jiffies; + iee_set_track(p,&tmp); + } + else + { + #ifdef CONFIG_STACKDEPOT + p->handle = handle; + #endif + p->addr = addr; + p->cpu = smp_processor_id(); + p->pid = current->pid; + p->when = jiffies; + } +#else #ifdef CONFIG_STACKDEPOT p->handle = handle; #endif @@ -817,6 +860,7 @@ static void set_track_update(struct kmem_cache *s, void *object, p->cpu = smp_processor_id(); p->pid = current->pid; p->when = jiffies; +#endif } static __always_inline void set_track(struct kmem_cache *s, void *object, @@ -835,7 +879,14 @@ static void init_tracking(struct kmem_cache *s, void *object) return; p = get_track(s, object, TRACK_ALLOC); + #ifdef CONFIG_IEE + if(IS_ENABLED(CONFIG_CREDP) && strcmp(s->name, "cred_jar") == 0) + iee_memset(p, 0, 2*sizeof(struct track)); + else + memset(p, 0, 2*sizeof(struct track)); + #else memset(p, 0, 2*sizeof(struct track)); + #endif } static void print_track(const char *s, struct track *t, unsigned long pr_time) @@ -1045,7 +1096,14 @@ static void init_object(struct kmem_cache *s, void *object, u8 val) unsigned int poison_size = s->object_size; if (s->flags & SLAB_RED_ZONE) { + #ifdef CONFIG_IEE + if(IS_ENABLED(CONFIG_CREDP) && strcmp(s->name, "cred_jar") == 0) + iee_memset(p - s->red_left_pad, val, s->red_left_pad); + else + memset(p - s->red_left_pad, val, s->red_left_pad); + #else memset(p - s->red_left_pad, val, s->red_left_pad); + #endif if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) { /* @@ -1058,12 +1116,34 @@ static void init_object(struct kmem_cache *s, void *object, u8 val) } if (s->flags & __OBJECT_POISON) { + #ifdef CONFIG_IEE + if(IS_ENABLED(CONFIG_CREDP) && strcmp(s->name, "cred_jar") == 0) + { + iee_memset(p, POISON_FREE, poison_size - 1); + iee_memset(&p[poison_size - 1], POISON_END, 1); + } + else + { + memset(p, POISON_FREE, poison_size - 1); + p[poison_size - 1] = POISON_END; + } + #else memset(p, POISON_FREE, poison_size - 1); p[poison_size - 1] = POISON_END; + #endif } - if (s->flags & SLAB_RED_ZONE) + if (s->flags & SLAB_RED_ZONE) { + #ifdef CONFIG_IEE + if(IS_ENABLED(CONFIG_CREDP) && strcmp(s->name, "cred_jar") == 0) + iee_memset(p + poison_size, val, s->inuse - poison_size); + else + memset(p + poison_size, val, s->inuse - poison_size); + #else memset(p + poison_size, val, s->inuse - poison_size); + #endif + + } } static void restore_bytes(struct kmem_cache *s, char *message, u8 data, @@ -1433,7 +1513,14 @@ void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) return; metadata_access_enable(); + #ifdef CONFIG_IEE + if(IS_ENABLED(CONFIG_CREDP) && strcmp(s->name, "cred_jar") == 0) + iee_memset(kasan_reset_tag(addr), POISON_INUSE, slab_size(slab)); + else + memset(kasan_reset_tag(addr), POISON_INUSE, slab_size(slab)); + #else memset(kasan_reset_tag(addr), POISON_INUSE, slab_size(slab)); + #endif metadata_access_disable(); } @@ -2015,6 +2102,9 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) void *start, *p, *next; int idx; bool shuffle; + #ifdef CONFIG_IEE + unsigned int order; + #endif flags &= gfp_allowed_mask; @@ -2029,6 +2119,9 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_RECLAIM; slab = alloc_slab_page(alloc_gfp, node, oo); + #ifdef CONFIG_IEE + order = oo_order(oo); + #endif if (unlikely(!slab)) { oo = s->min; alloc_gfp = flags; @@ -2037,6 +2130,9 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) * Try a lower order alloc if possible */ slab = alloc_slab_page(alloc_gfp, node, oo); + #ifdef CONFIG_IEE + order = oo_order(oo); + #endif if (unlikely(!slab)) return NULL; stat(s, ORDER_FALLBACK); @@ -2046,6 +2142,45 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) slab->inuse = 0; slab->frozen = 0; + #ifdef CONFIG_IEE + if(IS_ENABLED(CONFIG_CREDP) && strcmp(s->name, "cred_jar") == 0) + { + int i; + for(i = 0; i < (0x1 << order); i++) + { + unsigned long iee_addr = __phys_to_iee(page_to_phys(folio_page(slab_folio(slab), i))); + set_iee_page_valid(iee_addr); + iee_set_logical_mem_ro((unsigned long)page_address(folio_page(slab_folio(slab), i))); + } + } + + // If the page belongs to a task_struct, alloc token for it and set iee&lm va. + if(strcmp(s->name, "task_struct") == 0) + { + int i; + for(i = 0; i < (0x1 << order); i++) + { + void *token_addr = (void *)__phys_to_iee(page_to_phys(folio_page(slab_folio(slab), i))); + // Get lm va of the page. + void *alloc_token = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + iee_set_token_page_valid(token_addr, alloc_token); + set_iee_page_valid(__phys_to_iee(__pa(alloc_token))); + iee_set_logical_mem_ro((unsigned long)alloc_token); + } + } + #else + #ifdef CONFIG_KOI + if (strcmp(s->name, "task_struct") == 0) { + int i; + for (i = 0; i < (0x1 << order); i++) { + void *token_addr = __phys_to_virt(page_to_phys(page + i)) + koi_offset; + void *alloc_token = __get_free_page(GFP_KERNEL | __GFP_ZERO); + koi_add_page_mapping(token_addr, alloc_token); + } + } + #endif + #endif + account_slab(slab, oo_order(oo), s, flags); slab->slab_cache = s; @@ -2098,6 +2233,67 @@ static void __free_slab(struct kmem_cache *s, struct slab *slab) __folio_clear_slab(folio); mm_account_reclaimed_pages(pages); unaccount_slab(slab, order, s); + + #ifdef CONFIG_IEE + if(IS_ENABLED(CONFIG_CREDP) && strcmp(s->name, "cred_jar") == 0) + { + int i; + for(i = 0; i < (0x1 << order); i++) + { + unsigned long iee_addr = __phys_to_iee(page_to_phys(folio_page(folio, i))); + set_iee_page_invalid(iee_addr); + iee_set_logical_mem_rw((unsigned long)page_address(folio_page(folio, i))); + } + } + // If the page containing this token is empty, free it and restore iee&lm va. + if(strcmp(s->name, "task_struct") == 0) + { + int i; + for(i = 0; i < (0x1 << order); i++) + { + void *token_addr = (void *)__phys_to_iee(page_to_phys(folio_page(folio, i))); + unsigned long flags; + unsigned long res; + local_irq_save(flags); + asm volatile("at s1e1r, %0"::"r"(token_addr)); + isb(); + res = read_sysreg(par_el1); + local_irq_restore(flags); + if(!(res & 0x1)) + { + // Get lm va of the page. + void *token_page = __va(res & PTE_ADDR_MASK); + iee_set_token_page_invalid(token_addr); + set_iee_page_invalid(__phys_to_iee(__pa(token_page))); + iee_set_logical_mem_rw((unsigned long)token_page); + free_page((unsigned long)token_page); + } + } + } + #else + #ifdef CONFIG_KOI + if(strcmp(s->name, "task_struct") == 0) + { + int i; + for(i = 0; i < (0x1 << order); i++) + { + void *token_addr = __phys_to_virt(page_to_phys(page + i)) + koi_offset; + unsigned long flags; + local_irq_save(flags); + asm volatile("at s1e1r, %0"::"r"(token_addr)); + isb(); + unsigned long res = read_sysreg(par_el1); + local_irq_restore(flags); + if(!(res & 0x1)) + { + koi_remove_page_mapping(token_addr); + free_page(__va(res & PTE_ADDR_MASK)); + } + } + } + #endif + #endif + __free_pages(&folio->page, order); } diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index a2cbe44c48e1..7cf05d293312 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -28,6 +28,10 @@ #include <linux/vmalloc.h> #include <linux/sched.h> +#ifdef CONFIG_PTP +#include <linux/iee-func.h> +#endif + #include <asm/dma.h> #include <asm/pgalloc.h> @@ -146,6 +150,9 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, struct page *reuse) { pte_t *pte = pte_offset_kernel(pmd, addr); + #ifdef CONFIG_PTP + unsigned long iee_addr; + #endif if (pte_none(ptep_get(pte))) { pte_t entry; void *p; @@ -167,6 +174,11 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, get_page(reuse); p = page_to_virt(reuse); } +#ifdef CONFIG_PTP + iee_addr = __phys_to_iee(__pa(p)); + set_iee_page_valid(iee_addr); + iee_set_logical_mem_ro((unsigned long)p); +#endif entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); set_pte_at(&init_mm, addr, pte, entry); } @@ -176,11 +188,20 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node) { void *p = vmemmap_alloc_block(size, node); + #ifdef CONFIG_PTP + unsigned long iee_addr; + #endif if (!p) return NULL; memset(p, 0, size); + #ifdef CONFIG_PTP + iee_addr = __phys_to_iee(__pa(p)); + set_iee_page_valid(iee_addr); + iee_set_logical_mem_ro((unsigned long)p); + #endif + return p; } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index e6058942a084..27a006728009 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3431,7 +3431,7 @@ static int vmap_pfn_apply(pte_t *pte, unsigned long addr, void *private) if (WARN_ON_ONCE(pfn_valid(pfn))) return -EINVAL; - + ptent = pte_mkspecial(pfn_pte(pfn, data->prot)); set_pte_at(&init_mm, addr, pte, ptent); diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index c42ddd85ff1f..4714b4f2be08 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -34,6 +34,10 @@ #include <keys/user-type.h> #include "internal.h" +#ifdef CONFIG_CREDP +#include <asm/iee-cred.h> +#endif + MODULE_DESCRIPTION("DNS Resolver"); MODULE_AUTHOR("Wang Lei"); MODULE_LICENSE("GPL"); @@ -365,8 +369,13 @@ static int __init init_dns_resolver(void) /* instruct request_key() to use this special keyring as a cache for * the results it looks up */ set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); + #ifdef CONFIG_CREDP + iee_set_cred_thread_keyring(cred,keyring); + iee_set_cred_jit_keyring(cred,KEY_REQKEY_DEFL_THREAD_KEYRING); + #else cred->thread_keyring = keyring; cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; + #endif dns_resolver_cache = cred; kdebug("DNS resolver keyring: %d\n", key_serial(keyring)); diff --git a/security/commoncap.c b/security/commoncap.c index bc0521104197..d7d3b7cc13e8 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -26,6 +26,10 @@ #include <linux/personality.h> #include <linux/mnt_idmapping.h> +#ifdef CONFIG_CREDP +#include <asm/iee-cred.h> +#endif + /* * If a non-root user executes a setuid-root binary in * !secure(SECURE_NOROOT) mode, then we raise capabilities. @@ -266,6 +270,15 @@ int cap_capset(struct cred *new, if (!cap_issubset(*effective, *permitted)) return -EPERM; + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new,*effective); + iee_set_cred_cap_inheritable(new,*inheritable); + iee_set_cred_cap_permitted(new,*permitted); + + iee_set_cred_cap_ambient(new,cap_intersect(new->cap_ambient, + cap_intersect(*permitted, + *inheritable))); + #else new->cap_effective = *effective; new->cap_inheritable = *inheritable; new->cap_permitted = *permitted; @@ -277,6 +290,7 @@ int cap_capset(struct cred *new, new->cap_ambient = cap_intersect(new->cap_ambient, cap_intersect(*permitted, *inheritable)); + #endif if (WARN_ON(!cap_ambient_invariant_ok(new))) return -EINVAL; return 0; @@ -601,9 +615,16 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps, * pP' = (X & fP) | (pI & fI) * The addition of pA' is handled later. */ +#ifdef CONFIG_CREDP + kernel_cap_t temp = new->cap_permitted; + temp.val = (new->cap_bset.val & caps->permitted.val) | + (new->cap_inheritable.val & caps->inheritable.val); + iee_set_cred_cap_permitted(new,temp); +#else new->cap_permitted.val = (new->cap_bset.val & caps->permitted.val) | (new->cap_inheritable.val & caps->inheritable.val); +#endif if (caps->permitted.val & ~new->cap_permitted.val) /* insufficient to execute correctly */ @@ -726,7 +747,15 @@ static int get_file_caps(struct linux_binprm *bprm, struct file *file, int rc = 0; struct cpu_vfs_cap_data vcaps; + #ifdef CONFIG_CREDP + do { + kernel_cap_t tmp_cap = bprm->cred->cap_permitted; + tmp_cap.val = 0; + iee_set_cred_cap_permitted(bprm->cred, tmp_cap); + } while (0); + #else cap_clear(bprm->cred->cap_permitted); + #endif if (!file_caps_enabled) return 0; @@ -757,7 +786,15 @@ static int get_file_caps(struct linux_binprm *bprm, struct file *file, out: if (rc) + #ifdef CONFIG_CREDP + do { + kernel_cap_t tmp_cap = bprm->cred->cap_permitted; + tmp_cap.val = 0; + iee_set_cred_cap_permitted(bprm->cred, tmp_cap); + } while (0); + #else cap_clear(bprm->cred->cap_permitted); + #endif return rc; } @@ -809,8 +846,13 @@ static void handle_privileged_root(struct linux_binprm *bprm, bool has_fcap, */ if (__is_eff(root_uid, new) || __is_real(root_uid, new)) { /* pP' = (cap_bset & ~0) | (pI & ~0) */ + #ifdef CONFIG_CREDP + iee_set_cred_cap_permitted(new,cap_combine(old->cap_bset, + old->cap_inheritable)); + #else new->cap_permitted = cap_combine(old->cap_bset, old->cap_inheritable); + #endif } /* * If only the real uid is 0, we do not set the effective bit. @@ -919,34 +961,71 @@ int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file) /* downgrade; they get no more than they had, and maybe less */ if (!ns_capable(new->user_ns, CAP_SETUID) || (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)) { + #ifdef CONFIG_CREDP + iee_set_cred_euid(new,new->uid); + iee_set_cred_egid(new,new->gid); + #else new->euid = new->uid; new->egid = new->gid; + #endif } + #ifdef CONFIG_CREDP + iee_set_cred_cap_permitted(new,cap_intersect(new->cap_permitted, + old->cap_permitted)); + #else new->cap_permitted = cap_intersect(new->cap_permitted, old->cap_permitted); + #endif } + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new,new->euid); + iee_set_cred_suid(new,new->euid); + iee_set_cred_fsgid(new,new->egid); + iee_set_cred_sgid(new,new->egid); + #else new->suid = new->fsuid = new->euid; new->sgid = new->fsgid = new->egid; + #endif /* File caps or setid cancels ambient. */ if (has_fcap || is_setid) + #ifdef CONFIG_CREDP + do { + kernel_cap_t tmp_cap = new->cap_ambient; + tmp_cap.val = 0; + iee_set_cred_cap_ambient(new, tmp_cap); + } while (0); + #else cap_clear(new->cap_ambient); + #endif /* * Now that we've computed pA', update pP' to give: * pP' = (X & fP) | (pI & fI) | pA' */ + #ifdef CONFIG_CREDP + iee_set_cred_cap_permitted(new,cap_combine(new->cap_permitted, new->cap_ambient)); + #else new->cap_permitted = cap_combine(new->cap_permitted, new->cap_ambient); + #endif /* * Set pE' = (fE ? pP' : pA'). Because pA' is zero if fE is set, * this is the same as pE' = (fE ? pP' : 0) | pA'. */ if (effective) + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new,new->cap_permitted); + #else new->cap_effective = new->cap_permitted; + #endif else + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new,new->cap_ambient); + #else new->cap_effective = new->cap_ambient; + #endif if (WARN_ON(!cap_ambient_invariant_ok(new))) return -EPERM; @@ -957,7 +1036,11 @@ int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file) return ret; } + #ifdef CONFIG_CREDP + iee_set_cred_securebits(new,new->securebits & ~issecure_mask(SECURE_KEEP_CAPS)); + #else new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); + #endif if (WARN_ON(!cap_ambient_invariant_ok(new))) return -EPERM; @@ -1092,8 +1175,21 @@ static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old) !uid_eq(new->euid, root_uid) && !uid_eq(new->suid, root_uid))) { if (!issecure(SECURE_KEEP_CAPS)) { + #ifdef CONFIG_CREDP + do { + kernel_cap_t tmp_cap = new->cap_permitted; + tmp_cap.val = 0; + iee_set_cred_cap_permitted(new, tmp_cap); + } while (0); + do { + kernel_cap_t tmp_cap = new->cap_effective; + tmp_cap.val = 0; + iee_set_cred_cap_effective(new, tmp_cap); + } while (0); + #else cap_clear(new->cap_permitted); cap_clear(new->cap_effective); + #endif } /* @@ -1101,12 +1197,32 @@ static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old) * by exec to drop capabilities. We should make sure that * this remains the case. */ + #ifdef CONFIG_CREDP + do { + kernel_cap_t tmp_cap = new->cap_ambient; + tmp_cap.val = 0; + iee_set_cred_cap_ambient(new, tmp_cap); + } while (0); + #else cap_clear(new->cap_ambient); + #endif } if (uid_eq(old->euid, root_uid) && !uid_eq(new->euid, root_uid)) + #ifdef CONFIG_CREDP + do { + kernel_cap_t tmp_cap = new->cap_effective; + tmp_cap.val = 0; + iee_set_cred_cap_effective(new, tmp_cap); + } while (0); + #else cap_clear(new->cap_effective); + #endif if (!uid_eq(old->euid, root_uid) && uid_eq(new->euid, root_uid)) + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new,new->cap_permitted); + #else new->cap_effective = new->cap_permitted; + #endif } /** @@ -1142,13 +1258,22 @@ int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags) if (!issecure(SECURE_NO_SETUID_FIXUP)) { kuid_t root_uid = make_kuid(old->user_ns, 0); if (uid_eq(old->fsuid, root_uid) && !uid_eq(new->fsuid, root_uid)) + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new,cap_drop_fs_set(new->cap_effective)); + #else new->cap_effective = cap_drop_fs_set(new->cap_effective); + #endif if (!uid_eq(old->fsuid, root_uid) && uid_eq(new->fsuid, root_uid)) + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new,cap_raise_fs_set(new->cap_effective, + new->cap_permitted)); + #else new->cap_effective = cap_raise_fs_set(new->cap_effective, new->cap_permitted); + #endif } break; @@ -1243,7 +1368,15 @@ static int cap_prctl_drop(unsigned long cap) new = prepare_creds(); if (!new) return -ENOMEM; + #ifdef CONFIG_CREDP + { + kernel_cap_t tmp = new->cap_bset; + cap_lower(tmp, cap); + iee_set_cred_cap_bset(new, tmp); + } + #else cap_lower(new->cap_bset, cap); + #endif return commit_creds(new); } @@ -1319,7 +1452,11 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, new = prepare_creds(); if (!new) return -ENOMEM; + #ifdef CONFIG_CREDP + iee_set_cred_securebits(new,arg2); + #else new->securebits = arg2; + #endif return commit_creds(new); case PR_GET_SECUREBITS: @@ -1338,9 +1475,17 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, if (!new) return -ENOMEM; if (arg2) + #ifdef CONFIG_CREDP + iee_set_cred_securebits(new,new->securebits | issecure_mask(SECURE_KEEP_CAPS)); + #else new->securebits |= issecure_mask(SECURE_KEEP_CAPS); + #endif else + #ifdef CONFIG_CREDP + iee_set_cred_securebits(new,new->securebits & ~issecure_mask(SECURE_KEEP_CAPS)); + #else new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); + #endif return commit_creds(new); case PR_CAP_AMBIENT: @@ -1351,7 +1496,15 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, new = prepare_creds(); if (!new) return -ENOMEM; + #ifdef CONFIG_CREDP + do { + kernel_cap_t tmp_cap = new->cap_ambient; + tmp_cap.val = 0; + iee_set_cred_cap_ambient(new, tmp_cap); + } while (0); + #else cap_clear(new->cap_ambient); + #endif return commit_creds(new); } @@ -1375,9 +1528,25 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, if (!new) return -ENOMEM; if (arg2 == PR_CAP_AMBIENT_RAISE) + #ifdef CONFIG_CREDP + { + kernel_cap_t tmp = new->cap_ambient; + cap_raise(tmp, arg3); + iee_set_cred_cap_ambient(new, tmp); + } + #else cap_raise(new->cap_ambient, arg3); + #endif else + #ifdef CONFIG_CREDP + { + kernel_cap_t tmp = new->cap_ambient; + cap_lower(tmp, arg3); + iee_set_cred_cap_ambient(new, tmp); + } + #else cap_lower(new->cap_ambient, arg3); + #endif return commit_creds(new); } diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 19be69fa4d05..6cb164dfc19b 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -23,6 +23,9 @@ #include <linux/uaccess.h> #include <keys/request_key_auth-type.h> #include "internal.h" +#ifdef CONFIG_CREDP +#include <asm/iee-cred.h> +#endif #define KEY_MAX_DESC_SIZE 4096 @@ -1155,7 +1158,11 @@ static int keyctl_change_reqkey_auth(struct key *key) return -ENOMEM; key_put(new->request_key_auth); + #ifdef CONFIG_CREDP + iee_set_cred_request_key_auth(new,key_get(key)); + #else new->request_key_auth = key_get(key); + #endif return commit_creds(new); } @@ -1432,7 +1439,11 @@ long keyctl_set_reqkey_keyring(int reqkey_defl) } set: + #ifdef CONFIG_CREDP + iee_set_cred_jit_keyring(new,reqkey_defl); + #else new->jit_keyring = reqkey_defl; + #endif commit_creds(new); return old_setting; error: @@ -1644,9 +1655,17 @@ long keyctl_session_to_parent(void) cred = cred_alloc_blank(); if (!cred) goto error_keyring; + #ifdef CONFIG_CREDP + newwork = (struct rcu_head *)(cred->rcu.func); + #else newwork = &cred->rcu; + #endif + #ifdef CONFIG_CREDP + iee_set_cred_session_keyring(cred,key_ref_to_ptr(keyring_r)); + #else cred->session_keyring = key_ref_to_ptr(keyring_r); + #endif keyring_r = NULL; init_task_work(newwork, key_change_session_keyring); @@ -1705,7 +1724,11 @@ long keyctl_session_to_parent(void) write_unlock_irq(&tasklist_lock); rcu_read_unlock(); if (oldwork) + #ifdef CONFIG_CREDP + put_cred(*(struct cred **)(oldwork + 1)); + #else put_cred(container_of(oldwork, struct cred, rcu)); + #endif if (newwork) put_cred(cred); return ret; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index b5d5333ab330..aaa2a2347d84 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -19,6 +19,10 @@ #include <keys/request_key_auth-type.h> #include "internal.h" +#ifdef CONFIG_CREDP +#include <asm/iee-cred.h> +#endif + /* Session keyring create vs join semaphore */ static DEFINE_MUTEX(key_session_mutex); @@ -232,7 +236,11 @@ int install_thread_keyring_to_cred(struct cred *new) if (IS_ERR(keyring)) return PTR_ERR(keyring); + #ifdef CONFIG_CREDP + iee_set_cred_thread_keyring(new,keyring); + #else new->thread_keyring = keyring; + #endif return 0; } @@ -279,7 +287,11 @@ int install_process_keyring_to_cred(struct cred *new) if (IS_ERR(keyring)) return PTR_ERR(keyring); + #ifdef CONFIG_CREDP + iee_set_cred_process_keyring(new,keyring); + #else new->process_keyring = keyring; + #endif return 0; } @@ -338,7 +350,11 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) /* install the keyring */ old = cred->session_keyring; + #ifdef CONFIG_CREDP + iee_set_cred_session_keyring(cred,keyring); + #else cred->session_keyring = keyring; + #endif if (old) key_put(old); @@ -911,7 +927,11 @@ long join_session_keyring(const char *name) void key_change_session_keyring(struct callback_head *twork) { const struct cred *old = current_cred(); + #ifdef CONFIG_CREDP + struct cred *new = *(struct cred **)(twork + 1); + #else struct cred *new = container_of(twork, struct cred, rcu); + #endif if (unlikely(current->flags & PF_EXITING)) { put_cred(new); @@ -925,6 +945,38 @@ void key_change_session_keyring(struct callback_head *twork) return; } + /* If get_ucounts fails more bits are needed in the refcount */ + if (unlikely(!get_ucounts(old->ucounts))) { + WARN_ONCE(1, "In %s get_ucounts failed\n", __func__); + put_cred(new); + return; + } + + #ifdef CONFIG_CREDP + iee_set_cred_uid(new,old-> uid); + iee_set_cred_euid(new,old-> euid); + iee_set_cred_suid(new,old-> suid); + iee_set_cred_fsuid(new,old->fsuid); + iee_set_cred_gid(new,old-> gid); + iee_set_cred_egid(new,old-> egid); + iee_set_cred_sgid(new,old-> sgid); + iee_set_cred_fsgid(new,old->fsgid); + iee_set_cred_user(new,get_uid(old->user)); + iee_set_cred_ucounts(new, old->ucounts); + iee_set_cred_user_ns(new,get_user_ns(old->user_ns)); + iee_set_cred_group_info(new,get_group_info(old->group_info)); + + iee_set_cred_securebits(new,old->securebits); + iee_set_cred_cap_inheritable(new,old->cap_inheritable); + iee_set_cred_cap_permitted(new,old->cap_permitted); + iee_set_cred_cap_effective(new,old->cap_effective); + iee_set_cred_cap_ambient(new,old->cap_ambient); + iee_set_cred_cap_bset(new,old->cap_bset); + + iee_set_cred_jit_keyring(new,old->jit_keyring); + iee_set_cred_thread_keyring(new,key_get(old->thread_keyring)); + iee_set_cred_process_keyring(new,key_get(old->process_keyring)); + #else new-> uid = old-> uid; new-> euid = old-> euid; new-> suid = old-> suid; @@ -948,6 +1000,7 @@ void key_change_session_keyring(struct callback_head *twork) new->jit_keyring = old->jit_keyring; new->thread_keyring = key_get(old->thread_keyring); new->process_keyring = key_get(old->process_keyring); + #endif security_transfer_creds(new, old); diff --git a/security/security.c b/security/security.c index 407b51719f79..74ffd7ea3f37 100644 --- a/security/security.c +++ b/security/security.c @@ -30,6 +30,9 @@ #include <linux/string.h> #include <linux/msg.h> #include <net/flow.h> +#ifdef CONFIG_CREDP +#include <asm/iee-cred.h> +#endif /* How many LSMs were built into the kernel? */ #define LSM_COUNT (__end_lsm_info - __start_lsm_info) @@ -570,11 +573,19 @@ EXPORT_SYMBOL(unregister_blocking_lsm_notifier); static int lsm_cred_alloc(struct cred *cred, gfp_t gfp) { if (blob_sizes.lbs_cred == 0) { + #ifdef CONFIG_CREDP + iee_set_cred_security(cred,NULL); + #else cred->security = NULL; + #endif return 0; } + #ifdef CONFIG_CREDP + iee_set_cred_security(cred,kzalloc(blob_sizes.lbs_cred, gfp)); + #else cred->security = kzalloc(blob_sizes.lbs_cred, gfp); + #endif if (cred->security == NULL) return -ENOMEM; return 0; @@ -2950,7 +2961,11 @@ void security_cred_free(struct cred *cred) call_void_hook(cred_free, cred); kfree(cred->security); + #ifdef CONFIG_CREDP + iee_set_cred_security(cred,NULL); + #else cred->security = NULL; + #endif } /** -- 2.34.1
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2