Restricted WX Mappings

This page describes a simple hardening feature that I am proposing for Linux/Glibc. It intends to improve security by allowing a task to voluntarily drop its ability to create mappings that are both writable and executable, which is an ability that is not often needed and widely abused.

This feature is policy-free, i.e., it is a piece of functionality offered by the kernel whose mode of usage is completely up to userland. No sysfs or other controls exist that control its behavior, it is up the task itself to enable it (or not).

The kernel side

The changes on the kernel side are fairly simple and unobtrusive. Code is here.

Control from user space

A simple prctl() option is added that sets a per-task bit indicating that restricted WX mappings are in effect. The bit can only be enabled, not disabled.

Restricting writable and executable permissions

In order to really mitigate the risk of mappings that are both writable and executable, simply prohibiting mappings that have both write and execute permissions set is not sufficient. Mappings may have their permissions changed throughout their lifetime, i.e., a mapping can go from rw- to r-- to r-x, and changes made while the mapping was writable may become executable later.

Instead, the approach was chosen to manipulate the VM_MAYWRITE and VM_MAYEXEC bits. These bits are maintained by the kernel for each mapping, and reflect whether the respective VM_WRITE and VM_EXEC permissions can be granted at any time during the lifetime of the mapping.

When restricted WX mappings are in effect, the following logic is applied to the permission bits upon invocation of mmap() or mprotect():

  • if rwx permissions are requested, fail the call;

  • if either VM_MAYWRITE or VM_MAYEXEC is already cleared (e.g,. if the mapping originates from a noexec mounted volume), do nothing;

  • if r-x permissions are requested, clear VM_MAYWRITE;

  • else clear VM_MAYEXEC.

As a result, writable mappings can no longer become executable ever during their lifetime.

PROT_FINAL

A PROT_FINAL flag is added for use my mmap() and mprotect() [see next section]

Changes to the dynamic loader

In case of dynamic binaries, many mappings will already have been created by the dynamic loader before the program's main() is called. In order to allow for the same level of restriction on those mappings, a new PROT flag PROT_FINAL is introduced that can be used by the dynamic loader (or anywhere else) to indicate that a mapping should be restricted, i.e., that VM_MAYWRITE or VM_MAYEXEC flags (or even VM_MAYREAD flags in case of PROT_NONE mappings) should be cleared.

Patch against eglibc is below.

The result is an address space that is completely locked down: none of the existing writable mappings can be made executable, and no new mappings can be created that can be abused in such a way.

diff --git a/libc/elf/dl-load.c b/libc/elf/dl-load.c
index e2f4a54cfe81..cebcfbb59fd4 100644
--- a/libc/elf/dl-load.c
+++ b/libc/elf/dl-load.c
@@ -39,6 +39,10 @@
 
 #include <dl-dst.h>
 
+#ifndef PROT_FINAL
+#define PROT_FINAL     0x20
+#endif
+
 /* On some systems, no flag bits are given to specify file mapping.  */
 #ifndef MAP_FILE
 # define MAP_FILE      0
@@ -1285,7 +1289,7 @@ cannot allocate TLS data structures for initial thread");
 
        /* Remember which part of the address space this object uses.  */
        l->l_map_start = (ElfW(Addr)) __mmap ((void *) mappref, maplength,
-                                             c->prot,
+                                             c->prot|PROT_WRITE|PROT_FINAL,
                                              MAP_COPY|MAP_FILE,
                                              fd, c->mapoff);
        if (__builtin_expect ((void *) l->l_map_start == MAP_FAILED, 0))
@@ -1306,7 +1310,7 @@ cannot allocate TLS data structures for initial thread");
             mapping.  */
          __mprotect ((caddr_t) (l->l_addr + c->mapend),
                      loadcmds[nloadcmds - 1].mapstart - c->mapend,
-                     PROT_NONE);
+                     PROT_NONE|PROT_FINAL);
 
        l->l_contiguous = 1;
 
@@ -1336,7 +1340,7 @@ cannot allocate TLS data structures for initial thread");
        if (c->mapend > c->mapstart
            /* Map the segment contents from the file.  */
            && (__mmap ((void *) (l->l_addr + c->mapstart),
-                       c->mapend - c->mapstart, c->prot,
+                       c->mapend - c->mapstart, c->prot|PROT_WRITE|PROT_FINAL,
                        MAP_FIXED|MAP_COPY|MAP_FILE,
                        fd, c->mapoff)
                == MAP_FAILED))
@@ -1371,32 +1375,16 @@ cannot allocate TLS data structures for initial thread");
              zeropage = zeroend;
 
            if (zeropage > zero)
-             {
                /* Zero the final part of the last page of the segment.  */
-               if (__builtin_expect ((c->prot & PROT_WRITE) == 0, 0))
-                 {
-                   /* Dag nab it.  */
-                   if (__mprotect ((caddr_t) (zero
-                                              & ~(GLRO(dl_pagesize) - 1)),
-                                   GLRO(dl_pagesize), c->prot|PROT_WRITE) < 0)
-                     {
-                       errstring = N_("cannot change memory protections");
-                       goto call_lose_errno;
-                     }
-                 }
                memset ((void *) zero, '\0', zeropage - zero);
-               if (__builtin_expect ((c->prot & PROT_WRITE) == 0, 0))
-                 __mprotect ((caddr_t) (zero & ~(GLRO(dl_pagesize) - 1)),
-                             GLRO(dl_pagesize), c->prot);
-             }
 
            if (zeroend > zeropage)
              {
                /* Map the remaining zero pages in from the zero fill FD.  */
                caddr_t mapat;
                mapat = __mmap ((caddr_t) zeropage, zeroend - zeropage,
-                               c->prot, MAP_ANON|MAP_PRIVATE|MAP_FIXED,
-                               -1, 0);
+                               c->prot|PROT_FINAL,
+                               MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
                if (__builtin_expect (mapat == MAP_FAILED, 0))
                  {
                    errstring = N_("cannot map zero-fill pages");
@@ -1405,6 +1393,11 @@ cannot allocate TLS data structures for initial thread");
              }
          }
 
+       /* Remove write permissions and finalize */
+       if (!(c->prot & PROT_WRITE))
+         __mprotect ((caddr_t) (l->l_addr + c->mapstart),
+                       c->mapend - c->mapstart, c->prot|PROT_FINAL);
+
        ++c;
       }
   }
diff --git a/libc/elf/dl-reloc.c b/libc/elf/dl-reloc.c
index a0d3662b1d30..13a3a8ecad65 100644
--- a/libc/elf/dl-reloc.c
+++ b/libc/elf/dl-reloc.c
@@ -27,6 +27,10 @@
 #include <_itoa.h>
 #include "dynamic-link.h"
 
+#ifndef PROT_FINAL
+#define PROT_FINAL     0x20
+#endif
+
 /* Statistics function.  */
 #ifdef SHARED
 # define bump_num_cache_relocations() ++GL(dl_num_cache_relocations)
@@ -326,7 +330,7 @@ _dl_protect_relro (struct link_map *l)
                    & ~(GLRO(dl_pagesize) - 1));
 
   if (start != end
-      && __mprotect ((void *) start, end - start, PROT_READ) < 0)
+      && __mprotect ((void *) start, end - start, PROT_READ|PROT_FINAL) < 0)
     {
       static const char errstring[] = N_("\
 cannot apply additional memory protection after relocation");

ardbiesheuvel/RestrictedWxMappings (last modified 2014-01-22 16:45:24)