[erlang-bugs] Simple floating-point lists operation hungs Erlang system

Erik Søe Sørensen ess@REDACTED
Sat Nov 26 03:13:41 CET 2011


I can reproduce this on Linux (Intel(R) Core(TM)2 Duo CPU, 2.10GHz, 2GB).
This was done using 4 processes and 750k elements; apparent deadlock happened within a couple of minutes (but see below).

The OS threads of the Erlang VM are stuck doing this:
Thread 1:
#1  0x00c19971 in select () at ../sysdeps/unix/syscall-template.S:82
#2  0x08184554 in erts_sys_main_thread () at sys/unix/sys.c:3049
#3  0x08095be3 in erl_start (argc=14, argv=0xbfeb3f54) at beam/erl_init.c:1441
#4  0x080785eb in main (argc=14, argv=0xbfeb3f54) at sys/unix/erl_main.c:29

Thread 2:
#1  0x00ef2015 in pthread_cond_wait@@GLIBC_2.3.2 ()
    at ../nptl/sysdeps/unix/sysv/linux/i386/i686/../i486/pthread_cond_wait.S:122
#2  0x081e0ee7 in ethr_cond_wait (cnd=0x826bacc, mtx=0x826bab4)
    at common/ethr_mutex.c:1350
#3  0x080c51cc in erts_cnd_wait (old_activity=ERTS_ACTIVITY_GC, 
    new_activity=ERTS_ACTIVITY_UNDEFINED, locked=1, prepare=0, resume=0, 
    arg=0x0) at beam/erl_threads.h:697
#4  erts_smp_cnd_wait (old_activity=ERTS_ACTIVITY_GC, 
    new_activity=ERTS_ACTIVITY_UNDEFINED, locked=1, prepare=0, resume=0, 
    arg=0x0) at beam/erl_smp.h:472
#5  block_me (old_activity=ERTS_ACTIVITY_GC, 
    new_activity=ERTS_ACTIVITY_UNDEFINED, locked=1, prepare=0, resume=0, 
    arg=0x0) at beam/utils.c:3439
#6  erts_check_block (old_activity=ERTS_ACTIVITY_GC, 
    new_activity=ERTS_ACTIVITY_UNDEFINED, locked=1, prepare=0, resume=0, 
    arg=0x0) at beam/utils.c:3646
#7  0x0813a72c in erts_smp_set_activity (p=0xb543ef04, need=3, 
    objv=0xb7688ec0, nobj=11) at beam/sys.h:940
#8  erts_garbage_collect (p=0xb543ef04, need=3, objv=0xb7688ec0, nobj=11)
    at beam/erl_gc.c:395
#9  0x080f63d5 in erts_gc_mixed_times (p=0xb543ef04, reg=0xb7688ec0, live=11)
    at beam/erl_arith.c:1690
#10 0x0816a431 in process_main () at beam/beam_emu.c:2678

Thread 3:
#1  0x00ef4edb in write () at ../sysdeps/unix/syscall-template.S:82
#2  0x081e5d27 in write_fd (vfdp=0xb6570dd0, buf=0x81f2c41 "\n", len=1)
    at common/erl_printf.c:140
#3  0x081e566c in erts_printf_format (fn=0x81e5cf0 <write_fd>, arg=0xb6570dd0, 
    fmt=0x81f2c3b "F%X:%s\n", ap=0xb6570dfc "\033")
    at common/erl_printf_format.c:858
#4  0x081e5a72 in erts_vfdprintf (fd=3, format=0x81f2c3b "F%X:%s\n", 
    arglist=0xb6570dfc "\033") at common/erl_printf.c:376
#5  0x080bf2b0 in erts_print (to=1, arg=0x0, 
    format=0x1 <Address 0x1 out of bounds>) at beam/utils.c:313
#6  0x0814a289 in heap_dump (to=3, to_arg=0x0, x=2828803689)
    at beam/erl_process_dump.c:331
#7  0x0814a8d2 in dump_process_info (to=3, to_arg=0x0)
    at beam/erl_process_dump.c:125
#8  erts_deep_process_dump (to=3, to_arg=0x0) at beam/erl_process_dump.c:73
#9  0x08134f97 in erl_crash_dump_v (file=0xb547566c "\245\323\026\bXVG\265", 
    line=0, 
    fmt=0x81e8698 "%s: Cannot %s %lu bytes of memory (of type \"%s\").\n", 
    args=0xb6571f48 "*\206\036\b\301]\037\b|P\321\021\035\204\036\b")
    at beam/break.c:711
#10 0x080949fa in erl_exit (n=1, 
    fmt=0x81e8698 "%s: Cannot %s %lu bytes of memory (of type \"%s\").\n")
#11 0x0807ae5f in erts_alc_fatal_error (error=1, func=0, n=131)
    at beam/erl_alloc.c:1642
#12 0x0807af67 in erts_alloc_n_enomem (n=131, size=298930300)
    at beam/erl_alloc.c:1667
#13 0x081376b8 in erts_alloc (p=<value optimised out>, 
    new_sz=<value optimised out>, objv=<value optimised out>, nobj=11)
    at beam/erl_alloc.h:205
#14 do_minor (p=<value optimised out>, new_sz=<value optimised out>, 
    objv=<value optimised out>, nobj=11) at beam/erl_gc.c:897
#15 0x0813a29d in minor_collection (p=0xb543ed54, need=3, objv=0xb767fe40, 
    nobj=11) at beam/erl_gc.c:769
#16 erts_garbage_collect (p=0xb543ed54, need=3, objv=0xb767fe40, nobj=11)
    at beam/erl_gc.c:376
#17 0x080f6e1a in erts_gc_mixed_plus (p=0xb543ed54, reg=0xb767fe40, live=11)
    at beam/erl_arith.c:1308
#18 0x0816a431 in process_main () at beam/beam_emu.c:2678

Thread 4:
#1  0x00ef2015 in pthread_cond_wait@@GLIBC_2.3.2 ()
    at ../nptl/sysdeps/unix/sysv/linux/i386/i686/../i486/pthread_cond_wait.S:122
#2  0x081e0ee7 in ethr_cond_wait (cnd=0x82706c0, mtx=0x82706a4)
    at common/ethr_mutex.c:1350
#3  0x08185d31 in erts_cnd_wait (unused=0x0) at beam/erl_threads.h:697
#4  child_waiter (unused=0x0) at sys/unix/sys.c:2835
#5  0x081e2fb5 in thr_wrapper (vtwd=0xbfeb39f0) at pthread/ethread.c:106
#6  0x00eed96e in start_thread (arg=0xb6577b70) at pthread_create.c:300
#7  0x00c20a4e in clone () at ../sysdeps/unix/sysv/linux/i386/clone.S:130

Thread 5:
#1  0x00ef2015 in pthread_cond_wait@@GLIBC_2.3.2 ()
    at ../nptl/sysdeps/unix/sysv/linux/i386/i686/../i486/pthread_cond_wait.S:122
#2  0x081e0ee7 in ethr_cond_wait (cnd=0x8247dc0, mtx=0x8247d74)
    at common/ethr_mutex.c:1350
#3  0x080b902e in erts_cnd_wait (unused=0x0) at beam/erl_threads.h:697
#4  erts_smp_cnd_wait (unused=0x0) at beam/erl_smp.h:472
#5  sys_msg_dispatcher_func (unused=0x0) at beam/erl_trace.c:3238
#6  0x081e2fb5 in thr_wrapper (vtwd=0xbfeb3a70) at pthread/ethread.c:106
#7  0x00eed96e in start_thread (arg=0xb6d78b70) at pthread_create.c:300
#8  0x00c20a4e in clone () at ../sysdeps/unix/sysv/linux/i386/clone.S:130

Thread 6:
#1  0x00c19971 in select () at ../sysdeps/unix/syscall-template.S:82
#2  0x081e69d0 in erts_milli_sleep (ms=60000) at common/erl_misc_utils.c:159
#3  0x080bfd89 in emergency_watchdog (unused=0x0) at beam/utils.c:3959
#4  0x081e2fb5 in thr_wrapper (vtwd=0xbfeb3a70) at pthread/ethread.c:106
#5  0x00eed96e in start_thread (arg=0xb6d81b70) at pthread_create.c:300
#6  0x00c20a4e in clone () at ../sysdeps/unix/sysv/linux/i386/clone.S:130

Thread 7:
#0  0x00d38422 in __kernel_vsyscall ()
#1  0x00ef4f5b in read () at ../sysdeps/unix/syscall-template.S:82
#2  0x08184b3f in read (unused=0x0) at /usr/include/bits/unistd.h:45
#3  signal_dispatcher_thread_func (unused=0x0) at sys/unix/sys.c:2906
#4  0x081e2fb5 in thr_wrapper (vtwd=0xbfeb39a0) at pthread/ethread.c:106
#5  0x00eed96e in start_thread (arg=0xb7582b70) at pthread_create.c:300
#6  0x00c20a4e in clone () at ../sysdeps/unix/sysv/linux/i386/clone.S:130

--that's all--

My summary: one of the interpreter threads has detected an out-of-memory situation during GC and is creating a crash dump (verified: an erl_crash.dump file does exist in the directory and is slowly increasing in size once I detached the debugger); the other interpreter thread is stuck during GC, presumably waiting for the first one.

Denis: Do you see any erl_crash.dump files being created in the apparent deadlock situation?
If so, then it's a simple out-of-memory situation, and the only bug appears to be that the printing of the out-of-memory error report (see thread 3, stack frame #8 above) is blocked for some reason; I have not waited to find out whether it will eventually be printed.
Dumping a ~1GB process obviously takes some time, and is what the one busy CPU is doing.

(I did try lowering the limit of virtual memory the OS process was allowed; then the output is:
  Crash dump was written to: erl_crash.dump
  eheap_alloc: Cannot allocate 32097420 bytes of memory (of type "old_heap").
so it appears it will be printed, but for unknown-but-user-unfriendly reasons not until after dumping has completed.)

Regards,
Erik Søe Sørensen
________________________________________
From: erlang-bugs-bounces@REDACTED [erlang-bugs-bounces@REDACTED] On Behalf Of Denis Nesterov [copypastor@REDACTED]
Sent: 25 November 2011 23:03
To: erlang-bugs@REDACTED
Subject: [erlang-bugs] Simple floating-point lists operation hungs Erlang       system

Hello!

I wrote simple benchmark code to measure floating-point Erlang performance. And seems that code gets Erlang system into a deadlock :-p

The thing is that my code doing VERY simple thing - it first generates big list of pairs of Euclidian vectors, then in loop performs sequential mult-add operation on each element.
When I start one such process per CPU core and list is big enough, the Erlang systems quickly stops responding.

 The code is:

-module ( bug ).
-export ( [start/0] ).

-record ( vec3, { x=0, y=0, z=0 } ).


vec3ma( V0, V1, K ) when is_record( V0, vec3 ) and is_record( V1, vec3 ) and is_number( K ) ->
#vec3{ x = V0#vec3.x + V1#vec3.x * K, y = V0#vec3.y + V1#vec3.y * K, z = V0#vec3.z + V1#vec3.z * K }.

%create test data
makeVecTestList( L, 0 ) -> L;
makeVecTestList( L, N ) ->
E = { #vec3{ x = N * 10.0, y = 20.0, z = 30.0 }, #vec3{ x=10.0, y=20.0, z=-40.0 } },
makeVecTestList( [ E|L ], N - 1 ).

%Test list-processing code
proc( L ) -> proc( L, [] ).
proc( [], Acc ) -> Acc;
proc( [ { P, V } | T ], Acc ) -> proc( T, [ { vec3ma( P, V, 0.11 ), V } | Acc ] ).

%process loop
vecTestLoop( L, T, I ) ->
Now = now(),
DT = timer:now_diff( Now, T ) * 1.0e-3,
io:format( "Step~p: ~p ms\n", [I, DT] ),
vecTestLoop( proc( L ), Now, I ).

start() ->
L = makeVecTestList( [], 1000000 ),
io:format( "Running test with: ~p\n", [length(L)] ),
spawn( fun() -> vecTestLoop( L, now(), 1 ) end ),
spawn( fun() -> vecTestLoop( L, now(), 2 ) end ),
spawn( fun() -> vecTestLoop( L, now(), 3 ) end ),
spawn( fun() -> vecTestLoop( L, now(), 4 ) end ).


I used next test-cases:

AMD 64 2 core, 3.1 GHZ, Windows XP, Erlang R14B04:
- Works well with one process and any list that fits into memory
- Works well with two processes and list of 500000 elements
- Hangs in about 30-60 seconds with list of 750000 elements
- Hangs almost immediately with list of 1000000 elements

Intel Quad 2.67 GHz, Windows 7 x64, Erlang R14B04:
- Works well with one process and any list that fits into memory
- Hangs in about 30-60 seconds with list of 1000000 elements
- Hangs almost immediately with list of 1500000 elements

 This looks very similar to deadlock - when normal iterations freeezes, the erl.exe cannot be stopped with Ctrl+C/Break, typing "q()." etc, and starts to load ONLY ONE CORE. Though while it still alive, it consumes all cores.

Memory consumption by erl.exe was about or even under 1 GB in all cases.




More information about the erlang-bugs mailing list