hipe segmentation fault

Mikael Pettersson <>
Thu Apr 6 21:44:07 CEST 2006


Serge Aleynikov writes:
 > Mikael,
 > 
 > Here's a bunch of info you requested.  Let's continue this discussion 
 > outside of the mailing list's scope, and just post the resolution 
 > when/if it's available.
 > 
[gdb session omitted]

Problem solved. According to the gdb session everything looked OK,
yet the code couldn't be executed. It turns out that with the fairly
new processor model Serge is using, the Linux kernel makes normal
data memory non-executable by default. We've seen this long ago on
AMD64 machines with 64-bit kernels, but never before on machines with
32-bit kernels. The solution is to adapt the specially written AMD64
code memory allocation routines for use on x86 as well.

The patch below solves this problem. It's been checked in on the R11
development branch and should be applied to R10B-10 as well.

/Mikael

--- otp-0330/erts/emulator/hipe/hipe_amd64.h.~1~	2005-12-15 12:29:52.000000000 +0100
+++ otp-0330/erts/emulator/hipe/hipe_amd64.h	2006-04-06 17:46:19.000000000 +0200
@@ -14,10 +14,6 @@
 
 #define hipe_arch_name	am_amd64
 
-/* for hipe_bifs_enter_code_2 */
-extern void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p);
-#define HIPE_ALLOC_CODE(n,c,t,p) hipe_alloc_code((n),(c),(t),(p))
-
 extern const Uint sse2_fnegate_mask[];
 
 #endif /* HIPE_AMD64_H */
--- otp-0330/erts/emulator/hipe/hipe_x86.c.~1~	2005-12-15 12:29:53.000000000 +0100
+++ otp-0330/erts/emulator/hipe/hipe_x86.c	2006-04-06 19:23:59.000000000 +0200
@@ -50,6 +50,114 @@
     return 0;
 }
 
+/*
+ * Memory allocator for executable code.
+ *
+ * This is required on x86 because some combinations
+ * of Linux kernels and CPU generations default to
+ * non-executable memory mappings, causing ordinary
+ * malloc() memory to be non-executable.
+ */
+static unsigned int code_bytes;
+static char *code_next;
+
+#if 0	/* change to non-zero to get allocation statistics at exit() */
+static unsigned int total_mapped, nr_joins, nr_splits, total_alloc, nr_allocs, nr_large, total_lost;
+static unsigned int atexit_done;
+
+static void alloc_code_stats(void)
+{
+    printf("\r\nalloc_code_stats: %u bytes mapped, %u joins, %u splits, %u bytes allocated, %u average alloc, %u large allocs, %u bytes lost\r\n",
+	   total_mapped, nr_joins, nr_splits, total_alloc, nr_allocs ? total_alloc/nr_allocs : 0, nr_large, total_lost);
+}
+
+static void atexit_alloc_code_stats(void)
+{
+    if (!atexit_done) {
+	atexit_done = 1;
+	(void)atexit(alloc_code_stats);
+    }
+}
+
+#define ALLOC_CODE_STATS(X)	do{X;}while(0)
+#else
+#define ALLOC_CODE_STATS(X)	do{}while(0)
+#endif
+
+static void morecore(unsigned int alloc_bytes)
+{
+    unsigned int map_bytes;
+    char *map_hint, *map_start;
+
+    /* Page-align the amount to allocate. */
+    map_bytes = (alloc_bytes + 4095) & ~4095;
+
+    /* Round up small allocations. */
+    if (map_bytes < 1024*1024)
+	map_bytes = 1024*1024;
+    else
+	ALLOC_CODE_STATS(++nr_large);
+
+    /* Create a new memory mapping, ensuring it is executable
+       and in the low 2GB of the address space. Also attempt
+       to make it adjacent to the previous mapping. */
+    map_hint = code_next + code_bytes;
+    if ((unsigned long)map_hint & 4095)
+	abort();
+    map_start = mmap(map_hint, map_bytes,
+		     PROT_EXEC|PROT_READ|PROT_WRITE,
+		     MAP_PRIVATE|MAP_ANONYMOUS
+#ifdef __x86_64__
+		     |MAP_32BIT
+#endif
+		     ,
+		     -1, 0);
+    if (map_start == MAP_FAILED) {
+	perror("mmap");
+	abort();
+    }
+    ALLOC_CODE_STATS(total_mapped += map_bytes);
+
+    /* Merge adjacent mappings, so the trailing portion of the previous
+       mapping isn't lost. In practice this is quite successful. */
+    if (map_start == map_hint) {
+	ALLOC_CODE_STATS(++nr_joins);
+	code_bytes += map_bytes;
+    } else {
+	ALLOC_CODE_STATS(++nr_splits);
+	ALLOC_CODE_STATS(total_lost += code_bytes);
+	code_next = map_start;
+	code_bytes = map_bytes;
+    }
+
+    ALLOC_CODE_STATS(atexit_alloc_code_stats());
+}
+
+static void *alloc_code(unsigned int alloc_bytes)
+{
+    void *res;
+
+    /* Align function entries. */
+    alloc_bytes = (alloc_bytes + 3) & ~3;
+
+    if (code_bytes < alloc_bytes)
+	morecore(alloc_bytes);
+    ALLOC_CODE_STATS(++nr_allocs);
+    ALLOC_CODE_STATS(total_alloc += alloc_bytes);
+    res = code_next;
+    code_next += alloc_bytes;
+    code_bytes -= alloc_bytes;
+    return res;
+}
+
+void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p)
+{
+    if (is_not_nil(callees))
+	return NULL;
+    *trampolines = NIL;
+    return alloc_code(nrbytes);
+}
+
 /* called from hipe_bif0.c:hipe_bifs_make_native_stub_2()
    and hipe_bif0.c:hipe_make_stub() */
 void *hipe_make_native_stub(void *beamAddress, unsigned int beamArity)
@@ -76,7 +184,7 @@
 	16 +	/* 16 when both offsets are 8-bit */
 	(P_BEAM_IP >= 128 ? 3 : 0) +
 	(P_ARITY >= 128 ? 3 : 0);
-    codep = code = erts_alloc(ERTS_ALC_T_HIPE, codeSize);
+    codep = code = alloc_code(codeSize);
 
     /* movl $beamAddress, P_BEAM_IP(%ebp); 3 or 6 bytes, plus 4 */
     codep[0] = 0xc7;
--- otp-0330/erts/emulator/hipe/hipe_x86.h.~1~	2005-12-15 12:29:53.000000000 +0100
+++ otp-0330/erts/emulator/hipe/hipe_x86.h	2006-04-06 17:45:48.000000000 +0200
@@ -39,4 +39,8 @@
 extern void nbif_inc_stack_0(void);
 extern void nbif_handle_fp_exception(void);
 
+/* for hipe_bifs_enter_code_2 */
+extern void *hipe_alloc_code(Uint nrbytes, Eterm callees, Eterm *trampolines, Process *p);
+#define HIPE_ALLOC_CODE(n,c,t,p) hipe_alloc_code((n),(c),(t),(p))
+
 #endif /* HIPE_X86_H */



More information about the erlang-questions mailing list