%%% Convert Erlang mailing list HTML archives to mbox format %%% Use 'wget' to fetch the raw archives -module(ml2mb). -export([go/0, message/1]). go() -> ok. message(Name) -> {ok, F} = file:open(Name, [read]), Header = header(F), Body = body(F), MBox_start = "From bogus@mail.address Sat Jan 3 01:05:34 1996\n", Bin = list_to_binary([MBox_start, Header, Body]), file:write_file(Name ++ ".mbox", Bin). header(File) -> Raw = between_tags(File, "", ""), dehtml(lists:flatten(Raw)). body(File) -> between_tags(File, "", ""). between_tags(File, Start, End) -> TS = Start ++ "\n", case io:get_line(File, "") of eof -> ""; TS -> to_end_tag(File, End); _ -> between_tags(File, Start, End) end. to_end_tag(File, End) -> TE = End ++ "\n", case io:get_line(File, "") of eof -> ""; TE -> []; Line -> [Line|to_end_tag(File, End)] end. dehtml(String) -> {ok, Result, _Changes} = regexp:gsub(String, "<[^>]+>", ""), Result.