[erlang-questions] how: mnesia with simultaneous permanent node failure (EC2)
Paul Mineiro
paul-trapexit@REDACTED
Wed Dec 12 01:31:43 CET 2007
sorry, wrong mnesia_schema.patch attached to the last message.
correct one here.
-- p
On Tue, 11 Dec 2007, Paul Mineiro wrote:
> in order to get my disaster recovery situation managed on EC2, i went
> ahead and wrote mnesia_schema:del_table_copies/2, which allows one to
> remove multiple tables in one operation.
>
> attached are the patches to mnesia_schema.erl and mnesia_dumper.erl (also,
> to be pleasant should these concepts be accepted, mnesia.erl should be
> patched so that mnesia:del_table_copies/2 exists, but the meat is here).
>
> also attached are the original script indicating "2 out of 3" node failure
> where recovery was not possible, and a new script demonstrating the use of
> the new call.
>
> i'm not an mnesia expert so for all i know this is a really bad idea for
> reasons i'm not seeing at the moment. therefore, any input from mnesia
> gurus would be greatly appreciated.
>
> thanks,
>
> -- p
>
> On Sat, 1 Dec 2007, Paul Mineiro wrote:
>
> > hi.
> >
> > i'm thinking about using mnesia on EC2, but i'm having problems figuring
> > out disaster recovery.
> >
> > some background: on EC2, you can start as many machines (instances) as you
> > like. if you lose one, you can start another, but it'll have a different
> > hostname. when you lose an instance, you lose whatever was stored on the
> > drive.
> >
> > i was trying to figure out what i would do with a distributed mnesia
> > database when i lost a node. i came up with a procedure based upon
> > mnesia:del_table_copy/2 of the schema on the lost node which seems to work
> > (attached as test-disaster-one).
> >
> > however when i tried to apply the procedure to simultaneous loss of two
> > nodes, i ran into a problem; calling mnesia:del_table_copy/2 of schema
> > requires all other nodes to be active, and is this scenario i have lost
> > two nodes simultaneously (attached as test-disaster-two).
> >
> > any input from mnesia gurus would be greatly appreciated.
> >
> > thanks,
> >
> > -- p
> >
> > Optimism is an essential ingredient of innovation. How else can the
> > individual favor change over security?
> >
> > -- Robert Noyce
>
> Optimism is an essential ingredient of innovation. How else can the
> individual favor change over security?
>
> -- Robert Noyce
Optimism is an essential ingredient of innovation. How else can the
individual favor change over security?
-- Robert Noyce
-------------- next part --------------
--- /sw/lib/erlang/lib/mnesia-4.3.5/src/mnesia_schema.erl 2007-03-27 06:37:32.000000000 -0700
+++ mnesia_schema.erl 2007-12-11 16:20:30.000000000 -0800
@@ -40,6 +40,7 @@
cs2list/1,
del_snmp/1,
del_table_copy/2,
+ del_table_copies/2,
del_table_index/2,
delete_cstruct/2,
delete_schema/1,
@@ -104,7 +105,7 @@
do_delete_table_property/2,
do_write_table_property/2]).
--include("mnesia.hrl").
+-include_lib("mnesia/src/mnesia.hrl").
-include_lib("kernel/include/file.hrl").
-import(mnesia_lib, [set/2, del/2, verbose/2, dbg_out/2]).
@@ -1214,6 +1215,86 @@
end
end.
+del_table_copies(Tab, Ns) when is_list(Ns), Ns /= [] ->
+ schema_transaction(fun() ->
+ do_del_table_copies(Tab, Ns)
+ end).
+
+do_del_table_copies(Tab, Ns) ->
+ TidTs = get_tid_ts_and_lock(schema, write),
+ insert_schema_ops(TidTs, make_del_table_copies(Tab, Ns)).
+
+make_del_table_copies(Tab, Ns) ->
+ ensure_writable(schema),
+ Cs = incr_version(val({Tab, cstruct})),
+ NStorList = [{Node, mnesia_lib:schema_cs_to_storage_type(Node, Cs)}
+ || Node <- Ns ],
+ Cs2 = lists:foldl(fun({Node, Storage}, Acc) ->
+ new_cs(Acc, Node, Storage, del)
+ end,
+ Cs,
+ NStorList),
+ case mnesia_lib:cs_to_nodes(Cs2) of
+ [] when Tab == schema ->
+ mnesia:abort({combine_error, Tab, "Last replica"});
+ [] ->
+ ensure_active(Cs),
+ dbg_out("Last replica deleted in table ~p~n", [Tab]),
+ make_delete_table(Tab, whole_table);
+ _ when Tab == schema ->
+ ensure_active(Cs2),
+ lists:foreach(fun(Node) -> ensure_not_active(Tab, Node) end, Ns),
+ verify_cstruct(Cs2),
+ Ops = remove_nodes_from_tabs(val({schema, tables}), NStorList),
+ [{op, del_table_copies, [{N,ram_copies} || N<-Ns], cs2list (Cs2)} | Ops];
+ _ ->
+ ensure_active(Cs),
+ verify_cstruct(Cs2),
+ [{op, del_table_copies, NStorList, cs2list(Cs2)}]
+ end.
+
+remove_nodes_from_tabs([], _NStorList) ->
+ [];
+remove_nodes_from_tabs([schema|Rest], NStorList) ->
+ remove_nodes_from_tabs(Rest, NStorList);
+remove_nodes_from_tabs([Tab|Rest], NStorList) ->
+ {Cs, IsFragModified} =
+ lists:foldl(fun({Node, _}, {AccCs, AccIsFragModified}) ->
+ {NewCs, NewFM} = mnesia_frag:remove_node(Node,
+ AccCs),
+ {NewCs, NewFM or AccIsFragModified}
+ end,
+ {incr_version(val({Tab, cstruct})), false},
+ NStorList),
+
+ case lists:any (fun ({ _, X }) -> X =/= unknown end, NStorList) of
+ false ->
+ case IsFragModified of
+ true ->
+ [{op, change_table_frag, {del_node, N}, cs2list(Cs)} ||
+ {N, _} <- NStorList] ++
+ remove_nodes_from_tabs(Rest, NStorList);
+ false ->
+ remove_nodes_from_tabs(Rest, NStorList)
+ end;
+ true ->
+ Cs2 = lists:foldl(fun({_, unknown}, Acc) -> Acc;
+ ({Node, Storage}, Acc) ->
+ new_cs(Acc, Node, Storage, del)
+ end,
+ Cs,
+ NStorList),
+ case mnesia_lib:cs_to_nodes(Cs2) of
+ [] ->
+ [{op, delete_table, cs2list(Cs)} |
+ remove_nodes_from_tabs(Rest, NStorList)];
+ _Ns ->
+ verify_cstruct(Cs2),
+ [{op, del_table_copies, NStorList, cs2list(Cs2)}|
+ remove_nodes_from_tabs(Rest, NStorList)]
+ end
+ end.
+
new_cs(Cs, Node, ram_copies, add) ->
Cs#cstruct{ram_copies = opt_add(Node, Cs#cstruct.ram_copies)};
new_cs(Cs, Node, disc_copies, add) ->
@@ -1226,7 +1307,7 @@
Cs#cstruct{disc_copies = lists:delete(Node , Cs#cstruct.disc_copies)};
new_cs(Cs, Node, disc_only_copies, del) ->
Cs#cstruct{disc_only_copies =
- lists:delete(Node , Cs#cstruct.disc_only_copies)};
+ lists:delete(Node , Cs#cstruct.disc_only_copies)};
new_cs(Cs, _Node, Storage, _Op) ->
mnesia:abort({badarg, Cs#cstruct.name, Storage}).
@@ -1865,6 +1946,29 @@
{true, optional}
end;
+prepare_op(Tid, {op, del_table_copies, NStorList, TabDef}, _WaitFor) ->
+ Cs = list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+
+ if
+ %% Schema table lock is always required to run a schema op.
+ %% No need to look it.
+ node(Tid#tid.pid) == node(), Tab /= schema ->
+ Self = self(),
+ Pid = spawn_link(fun() -> lock_del_table(Tab, [N || {N,_}<-NStorList], Cs, Self) end),
+ put(mnesia_lock, Pid),
+ receive
+ {Pid, updated} ->
+ {true, optional};
+ {Pid, FailReason} ->
+ mnesia:abort(FailReason);
+ {'EXIT', Pid, Reason} ->
+ mnesia:abort(Reason)
+ end;
+ true ->
+ {true, optional}
+ end;
+
prepare_op(_Tid, {op, change_table_copy_type, N, FromS, ToS, TabDef}, _WaitFor)
when N == node() ->
Cs = list2cs(TabDef),
@@ -2108,7 +2212,21 @@
unlink(whereis(mnesia_tm)),
exit(normal).
-set_where_to_read(Tab, Node, Cs) ->
+set_where_to_read(Tab, NodeList, Cs) when is_list(NodeList) ->
+ case lists:member(mnesia_lib:val({Tab, where_to_read}), NodeList) of
+ true ->
+ case Cs#cstruct.local_content of
+ true ->
+ ok;
+ false ->
+ mnesia_lib:set_remote_where_to_read(Tab, NodeList),
+ ok
+ end;
+ _ ->
+ ok
+ end;
+
+set_where_to_read(Tab, Node, Cs) when is_atom(Node) ->
case mnesia_lib:val({Tab, where_to_read}) of
Node ->
case Cs#cstruct.local_content of
@@ -2264,6 +2382,16 @@
Tab = Cs#cstruct.name,
mnesia_lib:set({Tab, where_to_read}, Node);
+undo_prepare_op(_Tid, {op, del_table_copies, NStorList, TabDef}) ->
+ case lists:member (node (), [N || {N,_} <- NStorList]) of
+ true ->
+ Cs = list2cs(TabDef),
+ Tab = Cs#cstruct.name,
+ mnesia_lib:set({Tab, where_to_read}, node ());
+ false ->
+ ok
+ end;
+
undo_prepare_op(_Tid, {op, change_table_copy_type, N, FromS, ToS, TabDef})
when N == node() ->
More information about the erlang-questions
mailing list