[erlang-questions] how: mnesia with simultaneous permanent node failure (EC2)
Paul Mineiro
paul-trapexit@REDACTED
Sat Dec 1 19:49:31 CET 2007
hi.
i'm thinking about using mnesia on EC2, but i'm having problems figuring
out disaster recovery.
some background: on EC2, you can start as many machines (instances) as you
like. if you lose one, you can start another, but it'll have a different
hostname. when you lose an instance, you lose whatever was stored on the
drive.
i was trying to figure out what i would do with a distributed mnesia
database when i lost a node. i came up with a procedure based upon
mnesia:del_table_copy/2 of the schema on the lost node which seems to work
(attached as test-disaster-one).
however when i tried to apply the procedure to simultaneous loss of two
nodes, i ran into a problem; calling mnesia:del_table_copy/2 of schema
requires all other nodes to be active, and is this scenario i have lost
two nodes simultaneously (attached as test-disaster-two).
any input from mnesia gurus would be greatly appreciated.
thanks,
-- p
Optimism is an essential ingredient of innovation. How else can the
individual favor change over security?
-- Robert Noyce
-------------- next part --------------
#! /bin/sh
# This script is intended to simulate loss and recovery of a single
# EC2 node out of a pool of three
#
# Recovery is achieved by:
# 1. calling mnesia:del_table_copy (schema, LostNode) when the node dies
# 2. deleting the mnesia directory on the lost node (with EC2, this is
# automatic, as there is no persistent disk)
# 3. restarting the node
# 4. calling mnesia:add_table_copy (TableName, RecoveredNode) to reinstall
# tables
rm -rf Mnesia*flassy*
rm -rf Mnesia*turgy*
rm -rf Mnesia*warezy*
# first we start three nodes and get them running the same mnesia schema
erl -setcookie mega -sname flassy -s mnesia -noshell -noinput -eval '
true = register (hello, self ()),
{ atomic, ok } = mnesia:change_table_copy_type (schema, node (), disc_copies),
receive { From, ruthere } -> From ! imok end,
receive after infinity -> ok end
' &
erl -setcookie mega -sname wazzup -noshell -noinput -eval '
receive after 1000 -> ok end,
"wazzup@" ++ Host = atom_to_list (node ()),
pong = net_adm:ping (list_to_atom ("flassy@" ++ Host)),
Pid = rpc:call (list_to_atom ("flassy@" ++ Host), erlang, whereis, [ hello ]),
Pid ! { self (), ruthere },
receive imok -> ok end
' -s erlang halt
erl -setcookie mega -sname turgy -s mnesia -noshell -noinput -eval '
true = register (hello, self ()),
"turgy@" ++ Host = atom_to_list (node ()),
pong = net_adm:ping (list_to_atom ("flassy@" ++ Host)),
{ ok, _ } = mnesia:change_config (extra_db_nodes, erlang:nodes ()),
{ atomic, ok } = mnesia:change_table_copy_type (schema,
node (),
disc_copies),
receive { From, ruthere } -> From ! imok end,
receive after infinity -> ok end
' &
erl -setcookie mega -sname wazzup -noshell -noinput -eval '
receive after 1000 -> ok end,
"wazzup@" ++ Host = atom_to_list (node ()),
pong = net_adm:ping (list_to_atom ("turgy@" ++ Host)),
Pid = rpc:call (list_to_atom ("turgy@" ++ Host), erlang, whereis, [ hello ]),
Pid ! { self (), ruthere },
receive imok -> ok end
' -s erlang halt
erl -setcookie mega -sname warezy -s mnesia -noshell -noinput -eval '
true = register (hello, self ()),
"warezy@" ++ Host = atom_to_list (node ()),
pong = net_adm:ping (list_to_atom ("flassy@" ++ Host)),
pong = net_adm:ping (list_to_atom ("turgy@" ++ Host)),
{ ok, _ } = mnesia:change_config (extra_db_nodes, erlang:nodes ()),
{ atomic, ok } = mnesia:change_table_copy_type (schema,
node (),
disc_copies),
{ atomic, ok } =
mnesia:create_table (flass,
[ { disc_copies,
[ list_to_atom ("flassy@" ++ Host),
list_to_atom ("turgy@" ++ Host),
list_to_atom ("warezy@" ++ Host) ] } ]),
receive { From, ruthere } -> From ! imok end,
receive after infinity -> ok end
' &
erl -setcookie mega -sname wazzup -noshell -noinput -eval '
receive after 1000 -> ok end,
"wazzup@" ++ Host = atom_to_list (node ()),
pong = net_adm:ping (list_to_atom ("warezy@" ++ Host)),
Pid = rpc:call (list_to_atom ("warezy@" ++ Host), erlang, whereis, [ hello ]),
Pid ! { self (), ruthere },
receive imok -> ok end
' -s erlang halt
# now kill flassy and remove it from the schema
erl -setcookie mega -sname killah -noshell -noinput -eval '
"killah@" ++ Host = atom_to_list (node ()),
rpc:call (list_to_atom ("flassy@" ++ Host), erlang, halt, []),
{ atomic, ok } =
rpc:call (list_to_atom ("warezy@" ++ Host),
mnesia,
del_table_copy,
[ schema, list_to_atom ("flassy@" ++ Host) ])
' -s erlang halt
rm -rf Mnesia*flassy*
# now restart flassy
erl -setcookie mega -sname flassy -s mnesia -noshell -noinput -eval '
"flassy@" ++ Host = atom_to_list (node ()),
pong = net_adm:ping (list_to_atom ("turgy@" ++ Host)),
pong = net_adm:ping (list_to_atom ("warezy@" ++ Host)),
{ ok, _ } = mnesia:change_config (extra_db_nodes, erlang:nodes ()),
{ atomic, ok } = mnesia:change_table_copy_type (schema,
node (),
disc_copies),
{ atomic, ok } = mnesia:add_table_copy (flass, node (), ram_copies),
mnesia:system_info ()
' -s erlang halt
erl -setcookie mega -sname killah -noshell -noinput -eval '
"killah@" ++ Host = atom_to_list (node ()),
rpc:call (list_to_atom ("flassy@" ++ Host), erlang, halt, []),
rpc:call (list_to_atom ("turgy@" ++ Host), erlang, halt, []),
rpc:call (list_to_atom ("warezy@" ++ Host), erlang, halt, [])
' -s erlang halt
rm -rf Mnesia*flassy*
rm -rf Mnesia*turgy*
rm -rf Mnesia*warezy*
-------------- next part --------------
#! /bin/sh
# This script is intended to simulate loss and recovery of two
# EC2 nodes out of a pool of three
#
# Recovery is attempted by:
# 1. calling mnesia:del_table_copy (schema, LostNode) when the node dies
# 2. deleting the mnesia directory on the lost node (with EC2, this is
# automatic, as there is no persistent disk)
# 3. restarting the node
# 4. calling mnesia:add_table_copy (TableName, RecoveredNode) to reinstall
# tables
#
# Unfortunately this fails because the simultaneous failure means that
# step 1. is not allowed because
# "All replicas on diskfull nodes are not active yet"
rm -rf Mnesia*flassy*
rm -rf Mnesia*turgy*
rm -rf Mnesia*warezy*
# first we start three nodes and get them running the same mnesia schema
erl -setcookie mega -sname flassy -s mnesia -noshell -noinput -eval '
true = register (hello, self ()),
{ atomic, ok } = mnesia:change_table_copy_type (schema, node (), disc_copies),
receive { From, ruthere } -> From ! imok end,
receive after infinity -> ok end
' -s erlang halt &
erl -setcookie mega -sname wazzup -noshell -noinput -eval '
receive after 1000 -> ok end,
"wazzup@" ++ Host = atom_to_list (node ()),
pong = net_adm:ping (list_to_atom ("flassy@" ++ Host)),
Pid = rpc:call (list_to_atom ("flassy@" ++ Host), erlang, whereis, [ hello ]),
Pid ! { self (), ruthere },
receive imok -> ok end
' -s erlang halt
erl -setcookie mega -sname turgy -s mnesia -noshell -noinput -eval '
true = register (hello, self ()),
"turgy@" ++ Host = atom_to_list (node ()),
pong = net_adm:ping (list_to_atom ("flassy@" ++ Host)),
{ ok, _ } = mnesia:change_config (extra_db_nodes, erlang:nodes ()),
{ atomic, ok } = mnesia:change_table_copy_type (schema,
node (),
disc_copies),
receive { From, ruthere } -> From ! imok end,
receive after infinity -> ok end
' -s erlang halt &
erl -setcookie mega -sname wazzup -noshell -noinput -eval '
receive after 1000 -> ok end,
"wazzup@" ++ Host = atom_to_list (node ()),
pong = net_adm:ping (list_to_atom ("turgy@" ++ Host)),
Pid = rpc:call (list_to_atom ("turgy@" ++ Host), erlang, whereis, [ hello ]),
Pid ! { self (), ruthere },
receive imok -> ok end
' -s erlang halt
erl -setcookie mega -sname warezy -s mnesia -noshell -noinput -eval '
true = register (hello, self ()),
"warezy@" ++ Host = atom_to_list (node ()),
pong = net_adm:ping (list_to_atom ("flassy@" ++ Host)),
pong = net_adm:ping (list_to_atom ("turgy@" ++ Host)),
{ ok, _ } = mnesia:change_config (extra_db_nodes, erlang:nodes ()),
{ atomic, ok } = mnesia:change_table_copy_type (schema,
node (),
disc_copies),
{ atomic, ok } =
mnesia:create_table (flass,
[ { disc_copies,
[ list_to_atom ("flassy@" ++ Host),
list_to_atom ("turgy@" ++ Host),
list_to_atom ("warezy@" ++ Host) ] } ]),
receive { From, ruthere } -> From ! imok end,
receive after infinity -> ok end
' -s erlang halt &
erl -setcookie mega -sname wazzup -noshell -noinput -eval '
receive after 1000 -> ok end,
"wazzup@" ++ Host = atom_to_list (node ()),
pong = net_adm:ping (list_to_atom ("warezy@" ++ Host)),
Pid = rpc:call (list_to_atom ("warezy@" ++ Host), erlang, whereis, [ hello ]),
Pid ! { self (), ruthere },
receive imok -> ok end
' -s erlang halt
# now kill flassy and turgy and remove them from the schema
erl -setcookie mega -sname killah -noshell -noinput -eval '
"killah@" ++ Host = atom_to_list (node ()),
rpc:call (list_to_atom ("flassy@" ++ Host), erlang, halt, []),
rpc:call (list_to_atom ("turgy@" ++ Host), erlang, halt, []),
{ atomic, ok } =
rpc:call (list_to_atom ("warezy@" ++ Host),
mnesia,
del_table_copy,
[ schema, list_to_atom ("flassy@" ++ Host) ]),
{ atomic, ok } =
rpc:call (list_to_atom ("warezy@" ++ Host),
mnesia,
del_table_copy,
[ schema, list_to_atom ("turgy@" ++ Host) ])
' -s erlang halt
rm -rf Mnesia*flassy*
rm -rf Mnesia*turgy*
# now restart turgy
erl -setcookie mega -sname turgy -s mnesia -noshell -noinput -eval '
true = register (hello, self ()),
"turgy@" ++ Host = atom_to_list (node ()),
pong = net_adm:ping (list_to_atom ("flassy@" ++ Host)),
{ ok, _ } = mnesia:change_config (extra_db_nodes, erlang:nodes ()),
{ atomic, ok } = mnesia:change_table_copy_type (schema,
node (),
disc_copies),
receive { From, ruthere } -> From ! imok end,
receive after infinity -> ok end
' -s erlang halt &
erl -setcookie mega -sname wazzup -noshell -noinput -eval '
receive after 1000 -> ok end,
"wazzup@" ++ Host = atom_to_list (node ()),
pong = net_adm:ping (list_to_atom ("turgy@" ++ Host)),
Pid = rpc:call (list_to_atom ("turgy@" ++ Host), erlang, whereis, [ hello ]),
Pid ! { self (), ruthere },
receive imok -> ok end
' -s erlang halt
# now restart flassy
erl -setcookie mega -sname flassy -s mnesia -noshell -noinput -eval '
"flassy@" ++ Host = atom_to_list (node ()),
pong = net_adm:ping (list_to_atom ("turgy@" ++ Host)),
pong = net_adm:ping (list_to_atom ("warezy@" ++ Host)),
{ ok, _ } = mnesia:change_config (extra_db_nodes, erlang:nodes ()),
{ atomic, ok } = mnesia:change_table_copy_type (schema,
node (),
disc_copies),
{ atomic, ok } = mnesia:add_table_copy (flass, node (), ram_copies),
mnesia:system_info ()
' -s erlang halt
erl -setcookie mega -sname killah -noshell -noinput -eval '
"killah@" ++ Host = atom_to_list (node ()),
rpc:call (list_to_atom ("flassy@" ++ Host), erlang, halt, []),
rpc:call (list_to_atom ("turgy@" ++ Host), erlang, halt, []),
rpc:call (list_to_atom ("warezy@" ++ Host), erlang, halt, [])
' -s erlang halt
rm -rf Mnesia*flassy*
rm -rf Mnesia*turgy*
rm -rf Mnesia*warezy*
More information about the erlang-questions
mailing list