[erlang-questions] Erlang shows its slow face!
Edmond Begumisa
<>
Sun Nov 14 01:33:25 CET 2010
PARALLELISE, PARALLELISE, PARALLELISE!!!
I took your second algo and Tony's improved version (both pretty much
unchanged) and parallelised them below with little effort. Permutations
for B and C are calculated on a separate concurrent* process for every A
then the result is combined (e.g when N = 300, 300 concurrent worker
processes are spawned.)
I consistently obtained a 40-45% improvement in execution time regardless
of algo. I also noticed that non-parallel versions were using only ONE of
my cores whereas parallel versions were using BOTH my cores. Here are the
results for N = 300 (debug_info BEAM complied)...
Sequential Concurrent
Your original 8.44s N/A
Your py2 9.86s 5.51s
Tony's pythag2 1.95s 1.17s**
* This is just to illustrate that with any language, one should focus on
it's promises/strengths. Sometimes this can even de-emphasise it's
weaknesses. Though I still think it's best to use a tool that makes
pledges towards the problem area in the first place.
** This is probably roughly as fast the C# implementation but that's
besides the point!
I agree with Bernard. IMO, you're barking up the wrong tree for doing this
sort of thing quickly using either Erlang or C#. When you look closer, the
bottleneck with all the solutions so far isn't the calculation itself (I
was wrong about that earlier) -- it's actually the permutation (the part
done with accumulators/generators). Possibly a language with pointers
would be better at doing this fast since you'd be able to allocate a
fixed-size destructive memory structure to walk through and modify (that's
just an educated guess). I'd put my money on the horse named C NIF.
Anyways, here's the parallelised code...
NB: lpmap is a library function best stuck in a separate utility module.
It's my modified + edocumented version of Joe Armstrong's pmap from his
book "Programming Erlang: Software for a Concurrent World" Section 20.2
"Parallelizing Sequential Code" (go buy it!) The original source is at
http://www.pragprog.com/titles/jaerlang/source_code/
---- START CODE ----
py2(Max)->
lists:flatten(lpmap(fun(A) ->
forb(A, 1, [], Max)
end, lists:seq(1, Max), ordered)).
forb(A, B, Acc, Max) ->
Acc1 = forc(A, B, 1, Acc, Max),
case B < Max of
true -> forb(A, B+1, Acc1, Max);
false -> Acc1
end.
forc(A, B, C, Acc, Max) ->
Acc1 = case (A*A + B*B =:= C*C) andalso (A+B+C =< Max) of
true -> [{A,B,C}|Acc];
_ -> Acc
end,
case C < Max of
true-> forc(A, B, C+1, Acc1, Max);
false-> Acc1
end.
pythag2(N)->
lists:flatten(lpmap(fun(A) ->
pythan2_B(A, 1, N, [])
end, lists:seq(1, N), ordered)).
pythan2_A(A, N, Acc) when A > N -> Acc;
pythan2_A(A, N, Acc) -> pythan2_A(A+1,N,pythan2_B(A, 1, N, Acc)).
pythan2_B(A, B, N, Acc) when A+B > N -> Acc;
pythan2_B(A, B, N, Acc) -> pythan2_B(A,B+1,N,pythan2_C(A, B, 1, N, Acc)).
pythan2_C(A, B, C, N, Acc) when A+B+C > N -> Acc;
pythan2_C(A, B, C, N, Acc) ->
if A*A+B*B =:= C*C ->
pythan2_C(A, B, C+1, N, [{A,B,C}|Acc]);
true ->
pythan2_C(A, B, C+1, N, Acc)
end.
%% @spec lpmap(fun(), list(), (atom() = ordered|unordered)) -> list()
%% @doc Spawns a process for each element in list L, performs specified
%% function F against each in parallel and then returns results
either
%% same order as L (ordered) or in any order (unordered).
%% NB: See also lpmap/4.
lpmap(F, L, ordered) ->
Ref = erlang:make_ref(),
Pids = [lpmap_spawn_link(self(), Ref, F, I) || I <- L],
lpmap_gather_ordered(Pids, Ref, [], 0, void);
lpmap(F, L, unordered) ->
Ref = erlang:make_ref(),
lists:foreach(fun(I) ->
lpmap_spawn_link(self(), Ref, F, I)
end, L),
lpmap_gather_unordered(length(L), Ref, [], 0, void).
%% @spec lpmap(fun(), integer(), list(), (atom() = ordered|unordered))
-> list()
%% @doc Same as lpmap/3 except ensures only a maximum of MaxPs parallel
%% processes execute function F at any one time (i.e. first takes
MaxPs
%% items from list, executes F in parallel against each, then as
each
%% process returns, spawns another process on next item in L as
long as
%% active processes are less than MaxPs).
%% NB: See also lpmap/4.
lpmap(F, L, MaxPs, ordered) when MaxPs>0 ->
Ref = erlang:make_ref(),
{HPids, TPids} = if
length(L) > MaxPs -> lists:split(MaxPs, L);
true -> {L, []}
end,
Pids = [lpmap_spawn_link(self(), Ref, F, I) || I <- HPids],
lpmap_gather_ordered(Pids, Ref, TPids, MaxPs, F);
lpmap(F, L, MaxPs, unordered) when MaxPs>0 ->
Ref = erlang:make_ref(),
{HPids, TPids} = if
length(L) > MaxPs -> lists:split(MaxPs, L);
true -> {L, []}
end,
lists:foreach(fun(I) ->
lpmap_spawn_link(self(), Ref, F, I)
end, HPids),
lpmap_gather_unordered(length(HPids), Ref, TPids, MaxPs, F).
%% lpmap internal functions
lpmap_spawn_link(Parent, Ref, F, I) ->
spawn_link(fun() ->
Parent ! {self(), Ref, F(I)}
end).
lpmap_gather_ordered([], _Ref, [], _MaxPs, _F) ->
[];
lpmap_gather_ordered([HPid|TPids], Ref, L, MaxPs, F) ->
receive
{HPid, Ref, Ret} when length(TPids)<MaxPs, L=/=[] ->
[H | T] = L,
[Ret | lpmap_gather_ordered(
lists:append(TPids, [lpmap_spawn_link(self(), Ref, F, H)]),
Ref, T, MaxPs, F)];
{HPid, Ref, Ret} ->
[Ret | lpmap_gather_ordered(TPids, Ref, L, MaxPs, F)]
end.
lpmap_gather_unordered(0, _Ref, [], _MaxPs, _F) ->
[];
lpmap_gather_unordered(NPs, Ref, L, MaxPs, F) ->
receive
{_Pid, Ref, Ret} when NPs-1<MaxPs, L=/=[] ->
[H | T] = L,
lpmap_spawn_link(self(), Ref, F, H),
[Ret | lpmap_gather_unordered(NPs, Ref, T, MaxPs, F)];
{_Pid, Ref, Ret} ->
[Ret | lpmap_gather_unordered(NPs-1, Ref, L, MaxPs, F)]
end.
---- END CODE -----
- Edmond -
On Sun, 14 Nov 2010 02:59:18 +1100, Tony Rogvall <> wrote:
> Hi Gilberto!
>
> I did some rewrite of the FORM 2, that I think is more compatible with
> the condition that A+B+C =< N,
> and the result is of course much better. How did you implement the C#
> version ?
>
> My results for N = 300, times are measured with timer:tc.
>
> pythag0: 4837528 (4.8s)
> pythag1: 4289769 (4.3s)
> pythag2: 703632 (0.7s)
>
> And the result with +native
>
> pythag0: 1006865 (1.0s)
> pythag1: 441254 (0.4s)
> pythag2: 107387 (0.1s)
>
> /Tony
>
> My versions (original + the version that only generates the list once)
>
> -module(pythag).
>
> -compile(export_all).
>
> pythag0(N) ->
> [ {A,B,C} ||
> A <- lists:seq(1,N),
> B <- lists:seq(1,N),
> C <- lists:seq(1,N),
> A+B+C =< N,
> A*A+B*B =:= C*C].
>
> pythag1(N) ->
> L = lists:seq(1,N),
> [ {A,B,C} ||
> A <- L,
> B <- L,
> C <- L,
> A+B+C =< N,
> A*A+B*B =:= C*C].
>
>
> pythag2(N) ->
> lists:reverse(pythan2_A(1, N, [])).
>
> pythan2_A(A, N, Acc) when A > N -> Acc;
> pythan2_A(A, N, Acc) -> pythan2_A(A+1,N,pythan2_B(A, 1, N, Acc)).
>
> pythan2_B(A, B, N, Acc) when A+B > N -> Acc;
> pythan2_B(A, B, N, Acc) -> pythan2_B(A,B+1,N,pythan2_C(A, B, 1, N, Acc)).
>
> pythan2_C(A, B, C, N, Acc) when A+B+C > N -> Acc;
> pythan2_C(A, B, C, N, Acc) ->
> if A*A+B*B =:= C*C ->
> pythan2_C(A, B, C+1, N, [{A,B,C}|Acc]);
> true ->
> pythan2_C(A, B, C+1, N, Acc)
> end.
>
>
>
>
>
> On 13 nov 2010, at 08.37, Gilberto Carmenate García wrote:
>
>> Hi all!
>> I have been doing tests to Erlang I found this funny stuff that makes
>> Pythagorean Triplets
>>
>> pythag(N) ->
>> [ {A,B,C} ||
>> A <- lists:seq(1,N),
>> B <- lists:seq(1,N),
>> C <- lists:seq(1,N),
>> A+B+C =< N,
>> A*A+B*B =:= C*C].
>>
>> I tested it agains an implementation I made in C# so, and takes 14
>> secounds in my pc to do with 300 numbers in Erlang however in c# is just
>> a secound, even when C# runs under VM too.
>> So I did all possible ways for me to implement differents manners in
>> Erlang looking for speed and all is the same, listed as follows:
>>
>> So my question is, there are any way to do that even more fast, why 3
>> nestes fors structs in C# are more effients that lists:foldr or
>> lists:foreach in Erlang.
>>
>> %% FORMA 1
>> py1(Max)->
>> L = lists:seq(1, Max),
>> lists:foldr(
>> fun(A, Acc3)->
>> lists:foldr(
>> fun(B, Acc2)->
>> lists:foldr(
>> fun(C, Acc)->
>> case ((A*A + B*B =:= C*C) andalso (A+B+C =<
>> Max)) of
>> true->
>> [{A,B,C}|Acc];
>> false->
>> Acc
>> end
>> end
>> , Acc2, L)
>> end
>> , Acc3, L)
>> end
>> , [], L).
>>
>>
>>
>> %% FORMA 2
>> py2(Max)->
>> fora(1, [], Max).
>>
>> fora(A, Acc, Max)->
>> Acc1 = forb(A,1, Acc, Max),
>> case A < Max of
>> true->
>> fora(A+1, Acc1, Max);
>> false->
>> Acc1
>> end.
>>
>> forb(A,B, Acc, Max)->
>> Acc1 = forc(A,B,1, Acc, Max),
>> case B < Max of
>> true->
>> forb(A,B+1, Acc1, Max);
>> false->
>> Acc1
>> end.
>>
>> forc(A,B,C, Acc, Max)->
>> Acc1 = case (A*A + B*B =:= C*C) andalso (A+B+C =< Max) of
>> true->
>> [{A,B,C}|Acc];
>> _->
>> Acc
>> end,
>> case C < Max of
>> true->
>> forc(A,B,C+1, Acc1, Max);
>> false->
>> Acc1
>> end.
>>
>> %% FORMA 3.
>> py3(Max)->
>> [{A,B,C} ||
>> A <-lists:seq(1, Max),
>> B <-lists:seq(1, Max),
>> C <-lists:seq(1, Max),
>> A*A + B*B =:= C*C,
>> A+B+C =< Max].
>>
>>
>> =======================================================================
>> Este mensaje ha sido enviado mediante el servicio de correo electronico
>> que ofrece la Federacion de Radioaficionados de Cuba a sus miembros
>> para respaldar el cumplimiento de los objetivos de la organizacion y su
>> politica informativa. La persona que envia este correo asume el
>> compromiso de usar el servicio a tales fines y cumplir con las
>> regulaciones establecidas.
>>
>>
>>
>> ________________________________________________________________
>> erlang-questions (at) erlang.org mailing list.
>> See http://www.erlang.org/faq.html
>> To unsubscribe; mailto:
>>
>
> "Have run Make so many times I dunno what's installed anymore"
>
--
Using Opera's revolutionary e-mail client: http://www.opera.com/mail/
More information about the erlang-questions
mailing list