Process unexpectedly locks up

Doug Edmunds (gmail) dougedmunds@REDACTED
Tue May 4 06:52:28 CEST 2010


Hello,

I'm posting a module (conn3.erl) below.
This module builds a hierarchical tree of PIDs.
There are two loops, one for entries, and another
for the position in the tree(called 'me').

Each 'entry' runs a copy of the entry_loop.
Each entry keeps track of its parent (one pid)
and its children (list of 0 or more pids). Entries
are not called directly.

The me process runs in the me_loop. It manages the
entries, and moves up and down in the tree, via:
me ! add, me ! up, me ! down, me ! delete, me ! show.

The bug I've encountered is when trying to move down the
tree when there are multiple children. Here's the basic
scenario:
After running conn3:start(),
type:
     me ! add.
     me ! add.
to create two children.

Now type:
     me ! down.

Because there are more than one child, the code calls
indexlist/3, which returns a list of tuples:
[{1, PID1}, {2, PID2}, ...}.
Then the next line in the 'down' message prints
that list.

After that the user is supposed to pick the child
by using the integer:

    Input = get_user_input("Enter key: "),

But the pid  never gets to that line.  I get no error message.

It fails both in Windows XP and in a linux os.

If someone can figure out how to allow user input of a
selection, much appreciated.

Doug Edmunds

---------------------------

I've received this reply from Sam Bobroff, which indicates
there is some kind of conflict with "io:wait_io_mon_reply".


[Sam wrote:]

Getting a backtrace often helps. This is what I did:

$ erl
Erlang R13B03 (erts-5.7.4) [source] [smp:2:2] [rq:2] [async-threads:0]
[kernel-poll:false]

Eshell V5.7.4  (abort with ^G)
1> conn3_full:start().
- --setting me to top: self(): <0.34.0> M: <0.35.0> K: [] P: []
- --setting top self(): <0.35.0> M: <0.35.0> K: [] P []
ok
- --show self():<0.34.0> M: <0.35.0> K: [<0.37.0>,<0.36.0>] P: []
2> me ! down.
[{1,<0.37.0>},{2,<0.36.0>}]
down
3> {backtrace, BT} = process_info(whereis(me), backtrace).
{backtrace,<<"Program counter: 0x0079f3c8 (io:wait_io_mon_reply/2 +
28)\nCP: 0x00000000 (invalid)\narity = 0\n\n0x002f6cbc Ret"...>>}
4> io:fwrite("~s\n", [binary_to_list(BT)]).
Program counter: 0x0079f3c8 (io:wait_io_mon_reply/2 + 28)
CP: 0x00000000 (invalid)
arity = 0

0x002f6cbc Return addr 0x007a14c0 (conn3_full:get_user_input/1 + 20)
y(0)     #Ref<0.0.0.37>
y(1)     <0.25.0>

0x002f6cc8 Return addr 0x007a0bf4 (conn3_full:me_loop/3 + 676)

0x002f6ccc Return addr 0x001a1df4 (<terminate process normally>)
y(0)     []
y(1)     [{1,<0.37.0>},{2,<0.36.0>}]
y(2)     []
y(3)     [<0.37.0>,<0.36.0>]
y(4)     <0.35.0>

ok

I can see that "me" is still in it's loop and that it's currently in
"io:wait_io_mon_reply". I don't know exactly what this function is but
my guess would be it's something to do with the shell and io:get_line
(actually wait_io_mon_reply) fighting over the terminal input. If we try
again with -noshell it might be better but then we won't be able to use
the shell to send messages to "me".

So, I modified the source to add "me ! down" in the set up sequence at
line 16, and also uncommented the debug at the top of me_loop, and now I
get:

$ erl -noshell -run conn3_full
- --me_loop self(): <0.29.0> M: [] K:[] P: []
- --setting top self(): <0.30.0> M: <0.30.0> K: [] P []
- --setting me to top: self(): <0.29.0> M: <0.30.0> K: [] P: []
- --me_loop self(): <0.29.0> M: <0.30.0> K:[] P: []
- --me_loop self(): <0.29.0> M: <0.30.0> K:[<0.31.0>] P: []
- --me_loop self(): <0.29.0> M: <0.30.0> K:[<0.32.0>,<0.31.0>] P: []
- --show self():<0.29.0> M: <0.30.0> K: [<0.32.0>,<0.31.0>] P: []
- --me_loop self(): <0.29.0> M: <0.30.0> K:[<0.32.0>,<0.31.0>] P: []
[{1,<0.32.0>},{2,<0.31.0>}]
Enter key: 2
- --me_loop self(): <0.29.0> M: <0.30.0> K:[<0.32.0>,<0.31.0>] P: []
- --me_loop self(): <0.29.0> M: <0.31.0> K:[] P: <0.30.0>

I entered "2" at the prompt and the loop has continued :-)

---  End ------------------------


-module(conn3_full).
-compile(export_all).

%% usage conn:start().
%% then send messages to 'me' (see me_loop)

start()     ->
%    process_flag(trap_exit,  true),
     Me = spawn(?MODULE, me_loop,[[],[],[]]),
     register(me, Me),
     Top = spawn(?MODULE, entry_loop,[[],[],[]]),
     register(top, Top),
     me ! {first_time},
     top ! {first_time},
% uncomment this next line to get to the problem faster
%    me ! add, me ! add, me! show,
     ok.

me_loop(M,K,P) ->
%    io:format("--me_loop self(): ~p M: ~p K:~p P: ~p~n",[self(),M,K,P]),
     receive

     {first_time} ->
         NM = whereis(top),
         io:format("--setting me to top: self(): ~p M: ~p K: ~p P: ~p ~n",
                 [self(), whereis(top), K, P]),
         NM = whereis(top), NK = K, NP = P;

     show ->
         io:format("--show self():~p M: ~p K: ~p P: ~p~n",[self(),M,K,P]),
         NM = M, NK = K, NP = P;

     add ->
         %%create an entry
         Pid = spawn(?MODULE,entry_loop,[[],[],M]),
         Pid ! {set_pid, Pid},
         %%update the entry that 'me' is copying
         M ! {p_add_kid, Pid},
         %%update 'me'
         K2 = [Pid|K],
         NM = M, NK = K2, NP = P;


     del ->
         case P of
         [] -> io:format("--At the top~n");
         _  -> P ! {p_update_kids, M, K},
               ok = connect_kids_to_P(K,P),
               M ! die,
               me ! up
         end,
         NM = M, NK = K, NP = P;



     down ->
         case length(K) of
         0 -> io:format("--No kids~n");
         1 -> [Head |_] = K,
             Head ! {self(), info_request};
         _ -> Out = indexlist(1, K, []),
             ok = io:format("~p~n", [Out]),
%%%%%% When more than one 'kid',
%%%%%% process drops out of loop here.  BUG?

             Input = get_user_input("Enter key: "),
             {Int,Rest} = string:to_integer(Input),
             case is_integer(Int) andalso Rest == [] of
             true ->
                  Pick = pick_pid(Out,Int),
                  case is_pid(Pick) of
                  true ->
                      Pick ! {self(), info_request};
                  _ -> io:format("that number is not on the list~n")
                  end;
             _ ->
                  io:format("must enter an integer~n")
             end
         end,
         NM = M, NK = K, NP = P;

     up ->
         case P of
         [] -> io:format("--At the top~n");
         _  -> P ! {self(),info_request}
         end,
         NM = M, NK = K, NP = P;

     {info_requested, M2, K2, P2} ->
         NM = M2, NK = K2, NP = P2;

     die ->
         exit("killed"),
         io:format("~p died~n", [self()]),
         NM = M, NK = K, NP = P;

     Anything ->
         io:format("--me_loop got this:~p~n", [Anything]), NM = M, NK = 
K, NP = P
     end,
     me_loop (NM,NK,NP).



entry_loop(M,K,P) ->
%    io:format("--entry_loop self(): ~p M: ~p K:~p P: ~p~n",[self(),M,K,P]),
     receive

     {first_time} ->
         io:format("--setting top self(): ~p M: ~p K: ~p P ~p ~n",
            [self(), whereis(top), K, P]),
         NM = whereis(top), NK = K, NP = P;

     show ->
         io:format("--show self():~p M: ~p K: ~p P: ~p~n",[self(),M,K,P]),
         NM = M, NK = K, NP = P;

     {set_pid, Pid} ->
         NM = Pid, NK = K, NP = P;

     {From,info_request} ->
         From ! {info_requested, M, K, P},
         NM = M, NK = K, NP = P;

     {p_update_kids, Kid, GrandKidsList} ->
         K2 = lists:delete(Kid, K),
         K3 = lists:append(GrandKidsList,K2),
         %%still have to move me
         NM = M, NK = K3, NP = P;

     {kid_change_p, GrandP} ->
         P2 = GrandP,
         NM = M, NK = K, NP = P2;

     {p_add_kid, Pid} ->
         K2 = [Pid|K],
         NM = M, NK = K2, NP = P;

%    {tell_kids_about_Pid, Pid, Msg} ->
%        Pidlist = [Pidx || Pidx <- K, is_pid(Pid), Pid /= Pidx],
%        %%% exclude Pid
%        %%  io:format("--Pid list: ~p~n",[Pidlist]),
%        ok = tell_list(Pidlist, Pid, Msg),
%        NM = M, NK = K, NP = P;

     die ->
         exit("killed"),
         io:format("~p died~n", [self()]),
         NM = M, NK = K, NP = P;

     Anything ->
         io:format("--entry_loop Got this:~p~n", [Anything]), NM = M, NK 
= K, NP = P
     end,
     %%  io:format("here i am~n"),
     entry_loop (NM,NK,NP).

indexlist(Start, [H|T],Out) ->
     NewOut = lists: append ([{Start, H}], Out),
     Start2 = Start+1,
     indexlist(Start2,  T, NewOut);
indexlist(_, [], Out) -> lists:reverse(Out).

pick_pid(Out, Key) ->
     NewDict = dict:from_list(Out),
     case dict:is_key(Key,NewDict) of
         true -> dict:fetch(Key,NewDict);
         false -> "no such key"
     end.

get_user_input( Prompt ) ->
   string:strip(   % remove spaces from front and back
     string:strip( % remove line-feed from the end
       io:get_line( Prompt), right, $\n)).


connect_kids_to_P([],_) -> ok;
connect_kids_to_P(K,P) ->
     [H|T] = K, H ! {kid_change_p,P},
     connect_kids_to_P(T,P).

%%%not implemented
% tell_list([],_,_) ->  ok;
% tell_list([H|T],X,Msg) -> H ! {Msg, X}, tell_list(T,X, Msg).


%%%macro-ish utility
b_alive(String) ->  % ie b_alive("<0.35.0>")
     is_process_alive(list_to_pid(String)).


More information about the erlang-questions mailing list