On Thu, Mar 15, 2018 at 09:38:56PM -0500, Peter Lei wrote:
> Some recent UEFI implementations have begun to leave the CPU with page
> write protection enabled in CR0.
> 
> With r330539 which enables kernel page protections, interesting things
> happen during boot (aka panic) when protection is already enabled,
> including a write protection fault from an explicit .text fixup write
> from xsave->xsaveopt by fpuinit().
> 
> I see this so far booting -CURRENT under virtual environments:
> 
> - QEMU with recent OVMF EDK2 builds: this is certainly due to UEFI
> enabling paging and page protections.
> 
> - VMWare Fusion 10.1.x on Mac: no specific insight on what's going
> inside the implementation, but CR0_WP is definitely left enabled before
> the kernel is booted.
> 
> I have patched my kernel build to explicitly clear CR0_WP (e.g. in
> initializecpu) prior to creating the page tables to get around this, but
> someone might have a cleaner/better solution...

Try this.

diff --git a/sys/amd64/amd64/db_interface.c b/sys/amd64/amd64/db_interface.c
index 9dfd44cf82c..1ecec02835c 100644
--- a/sys/amd64/amd64/db_interface.c
+++ b/sys/amd64/amd64/db_interface.c
@@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/pcpu.h>
 
 #include <machine/cpufunc.h>
+#include <machine/md_var.h>
 #include <machine/specialreg.h>
 
 #include <ddb/ddb.h>
@@ -75,19 +76,19 @@ db_write_bytes(vm_offset_t addr, size_t size, char *data)
        jmp_buf jb;
        void *prev_jb;
        char *dst;
-       u_long cr0save;
+       bool old_wp;
        int ret;
 
-       cr0save = rcr0();
+       old_wp = false;
        prev_jb = kdb_jmpbuf(jb);
        ret = setjmp(jb);
        if (ret == 0) {
-               load_cr0(cr0save & ~CR0_WP);
+               old_wp = disable_wp();
                dst = (char *)addr;
                while (size-- > 0)
                        *dst++ = *data++;
        }
-       load_cr0(cr0save);
+       restore_wp(old_wp);
        (void)kdb_jmpbuf(prev_jb);
        return (ret);
 }
diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c
index 72b10396341..39367fa6ffb 100644
--- a/sys/amd64/amd64/fpu.c
+++ b/sys/amd64/amd64/fpu.c
@@ -205,6 +205,7 @@ fpuinit_bsp1(void)
 {
        u_int cp[4];
        uint64_t xsave_mask_user;
+       bool old_wp;
 
        if ((cpu_feature2 & CPUID2_XSAVE) != 0) {
                use_xsave = 1;
@@ -233,8 +234,14 @@ fpuinit_bsp1(void)
                 * Patch the XSAVE instruction in the cpu_switch code
                 * to XSAVEOPT.  We assume that XSAVE encoding used
                 * REX byte, and set the bit 4 of the r/m byte.
+                *
+                * It seems that some BIOSes give control to the OS
+                * with CR0.WP already set, making the kernel text
+                * read-only before cpu_startup().
                 */
+               old_wp = disable_wp();
                ctx_switch_xsave[3] |= 0x10;
+               restore_wp(old_wp);
        }
 }
 
diff --git a/sys/amd64/amd64/gdb_machdep.c b/sys/amd64/amd64/gdb_machdep.c
index 68eb6002593..f7ca3c07ea3 100644
--- a/sys/amd64/amd64/gdb_machdep.c
+++ b/sys/amd64/amd64/gdb_machdep.c
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/cpufunc.h>
 #include <machine/frame.h>
 #include <machine/gdb_machdep.h>
+#include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/psl.h>
 #include <machine/reg.h>
@@ -127,17 +128,14 @@ gdb_cpu_signal(int type, int code)
 void *
 gdb_begin_write(void)
 {
-       u_long cr0save;
 
-       cr0save = rcr0();
-       load_cr0(cr0save & ~CR0_WP);
-       return ((void *)cr0save);
+       return (disable_wp() ? &gdb_begin_write : NULL);
 }
 
 void
 gdb_end_write(void *arg)
 {
 
-       load_cr0((u_long)arg);
+       restore_wp(arg != NULL);
 }
 
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index e340c6cd14d..fcc45eca57d 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -2597,6 +2597,31 @@ clear_pcb_flags(struct pcb *pcb, const u_int flags)
            : "cc", "memory");
 }
 
+/*
+ * Enable and restore kernel text write permissions.
+ * Callers must ensure that disable_wp()/restore_wp() are executed
+ * without rescheduling on the same core.
+ */
+bool
+disable_wp(void)
+{
+       u_int cr0;
+
+       cr0 = rcr0();
+       if ((cr0 & CR0_WP) == 0)
+               return (false);
+       load_cr0(cr0 & ~CR0_WP);
+       return (true);
+}
+
+void
+restore_wp(bool old_wp)
+{
+
+       if (old_wp)
+               load_cr0(rcr0() | CR0_WP);
+}
+
 #ifdef KDB
 
 /*
diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h
index 63dabaf4047..abcc273b6c6 100644
--- a/sys/amd64/include/md_var.h
+++ b/sys/amd64/include/md_var.h
@@ -53,6 +53,8 @@ void  amd64_conf_fast_syscall(void);
 void   amd64_db_resume_dbreg(void);
 void   amd64_lower_shared_page(struct sysentvec *);
 void   amd64_syscall(struct thread *td, int traced);
+bool   disable_wp(void);
+void   restore_wp(bool old_wp);
 void   doreti_iret(void) __asm(__STRING(doreti_iret));
 void   doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault));
 void   ld_ds(void) __asm(__STRING(ld_ds));
_______________________________________________
freebsd-current@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "freebsd-current-unsubscr...@freebsd.org"

Reply via email to