ETS 'set' table bug ... or not?

Sat Jul 12 20:25:16 CEST 2003

Hi there.  While running some tests on a borrowed Pentium 4 Xeon
RedHat 8.0 Linux box, I encountered a segmentation violation problem
that puzzled me.  I'm not certain if it's a bug in ETS or if it is a
problem with the hardware.  The hardware claims to have ECC memory, so
it's unlikely (but not impossible, I suppose) that bad RAM is causing
the problem.

The code for test.erl is included below.  If I run the command line:

    erl -noinput -s test do set insert_rand 7000 delete_all \
	traverse_forward -s erlang halt

... no problem.  If I use a bigger number than 7000, no problem
... until I use this number: 53999154.  Then I get a segmentation
violation every time, apparently while the 'delete_all' part of the
test is running.

WARNING: The process size grows to about 2.6GB.

I was borrowing CPU cycles on this machine.  Alas, I no longer have
convenient access to that machine.  Could someone else with a 3-4GB
Pentium 4 box check to see if they can reproduce the problem?  It
would be good to know if this is an actual ETS bug.  (I was using
R9B-1 for my testing.)

-Scott

--- snip --- snip --- snip --- snip --- snip --- snip --- snip --- 

-module(test).

-export([do/1]).
%%% Internal exports
-export([
	 delete_all/0, delete_all/2,
	 insert_rand/0, insert_rand/2,
	 traverse_forward/0, traverse_forward/2
	]).
%%% Debugging exports
-export([unatomify/1, test_defined/1]).

%%% We use single arg style for do() so that we can automate via
%%% "erl -s test2 do bag insert_seq 400" or
%%% "erl -s test2 do set insert 11000000 3 traverse_forw lookup_seq 11000000"

do(ArgList) when list(ArgList) ->
    [TabType|AList] = unatomify(ArgList),
    Tab = ets:new(t, [TabType, private]),
    Res = do2(Tab, AList),
    format_results(Res),
    Res.

do2(Tab, AList) ->
    FAndAList = parse_alist(AList),
    [{DoFunc, timer:tc(?MODULE, DoFunc, [Args, Tab])} ||
	{DoFunc, Args} <- FAndAList].

delete_all() ->
    ok.
delete_all([], Tab) ->
    ets:safe_fixtable(Tab, true),
    V = delete_all2(ets:first(Tab), Tab),
    ets:safe_fixtable(Tab, false),
    V.
delete_all2('$end_of_table', Tab) ->
    ok;
delete_all2(Key, Tab) ->
    Next = ets:next(Tab, Key),
    ets:delete(Tab, Key),
    delete_all2(Next, Tab).

insert_rand() ->
    ok.
insert_rand([N], Tab) ->
    insert_rand([N, N], Tab);
insert_rand([Num, MaxKey], Tab) ->
    insert_rand([Num, MaxKey, 1], Tab);
insert_rand([Num, MaxKey, Val], Tab) ->
    insert_rand(Num, MaxKey div 4, make_randfun(MaxKey), Val, Tab).
insert_rand(0, Last, RandFun, Val, Tab) ->
    ok;
insert_rand(N, Last, RandFun, Val, Tab) ->
    New = RandFun(Last),
    true = ets:insert(Tab, {New, Val}),
    insert_rand(N - 1, New, RandFun, Val, Tab).

traverse_forward() ->
    ok.
traverse_forward([], Tab) ->
    traverse_forward2(ets:first(Tab), Tab).
traverse_forward2('$end_of_table', Tab) ->
    ok;
traverse_forward2(Key, Tab) ->
    traverse_forward2(ets:next(Tab, Key), Tab).

%%%
%%% Low-level stuff below here
%%%

do_loop(N, Fun) ->
    do_loop_skip(N, 1, 0, Fun).

do_loop_skip(N, Skip, Stop, _) when N =< Stop ->
    ok;
do_loop_skip(N, Skip, Stop, Fun) ->
    Fun(N),
    do_loop_skip(N - Skip, Skip, Stop, Fun).

%%%
%%% smaller_prime()'s algorithm stolen from erl_db.c.
%%%

smaller_prime(N) when N rem 2 == 0 ->
    smaller_prime(N - 1);
smaller_prime(N) ->
    NModI = smaller_prime_for(3, N),
    if NModI * NModI > N -> N;
       true              -> smaller_prime(N - 2)
    end.

smaller_prime_for(I, N) when I * I =< N ->
    if N rem I == 0 -> I;
       true         -> smaller_prime_for(I + 2, N)
    end;
smaller_prime_for(I, N) ->
    I.

%%% unatomify() can now convert atoms such as '{foo, bar}' and '[1, foo, 3]'

unatomify([]) ->
    [];
unatomify([H|T]) when atom(H) ->
    L = atom_to_list(H),
    [LH|LT] = L,
    if
	LH >= $0, LH =< $9 ->
	    [list_to_integer(L) | unatomify(T)];
	LH == $- ->
	    [list_to_integer(L) | unatomify(T)];
	LH == ${; LH == $[ ->
	    case erl_scan:string(L) of
		{ok, Tokens, _} ->
		    case erl_parse:parse_term(Tokens ++ [{dot, 1}]) of
			{ok, List} when list(List) -> [List | unatomify(T)];
			{ok, Term} -> [Term | unatomify(T)];
			_ -> [H | unatomify(T)]
		    end;
		_ ->
		    [H | unatomify(T)]
	    end;
	true ->
	    [H | unatomify(T)]
    end;
unatomify([H|T]) ->
    [H | unatomify(T)].    

parse_alist([]) ->
    [];
parse_alist([Func|T]) ->
    case test_defined(Func) of
	true ->
	    {Args, Rest} = parse_alist2(T),
	    [{Func, Args} | parse_alist(Rest)];
	false ->
	    throw({bad_funcname, Func})
    end.

parse_alist2(L) ->
    parse_alist2(L, []).
parse_alist2([], Acc) ->
    {lists:reverse(Acc), []};
parse_alist2([H|T] = L, Acc) ->
    case test_defined(H) of
	true ->
	    {lists:reverse(Acc), L};
	false ->
	    parse_alist2(T, [H|Acc])
    end.

test_defined(F) ->
    case catch apply(?MODULE, F, []) of
	ok ->
	    true;
	_ ->
	    false
    end.

%%% This function is fairly fast and typically hits at least 1/2 of
%%% total space from 0 through N, often more.  Careful choices of N
%%% can result in covering all values of 0 through N.

make_randfun(N) ->
    SmallerPrime = smaller_prime(N),
    YetSmallerPrime = smaller_prime(SmallerPrime - 1),
    fun(Old) -> (Old * SmallerPrime) rem YetSmallerPrime end.

format_results([]) ->
    ok;
format_results([H|T]) ->
    case H of
	{Test, {N, ok}} when is_integer(N) ->
	    io:format("~w ok ~w\n", [Test, N]);
	{Test, {N, Huh}} when is_integer(N) ->
	    io:format("~w FAIL ~w : ~w\n", [Test, N, Huh]);
	_ ->
	    foo
    end,
    format_results(T).