[erlang-questions] Unescape HTML text

Zabrane Mickael <>
Tue Sep 25 20:48:15 CEST 2012


answering my own question:

unescape(<<>>) -> 
    <<>>;
unescape([]) ->
    <<>>;
unescape(L) when is_list(L) ->
    unescape(list_to_binary(L));
unescape(B) when is_binary(B) ->
    unescape(B, <<>>).

unescape(<<>>, Acc) -> 
    Acc;
unescape(<<" ", T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, " ">>);
unescape(<<"&", T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, "&">>);
unescape(<<""", T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, "\"">>);
unescape(<<"'", T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, "\'">>);
unescape(<<"'", T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, "'">>);
unescape(<<"<", T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, "<">>);
unescape(<<">", T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, ">">>);
unescape(<<"€", T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, "€">>);
unescape(<<"ç", T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, "ç">>);
unescape(<<"à", T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, "à">>);
unescape(<<"â", T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, "â">>);
unescape(<<"ä", T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, "ä">>);
unescape(<<"æ", T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, "æ">>);
unescape(<<"è", T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, "è">>);
unescape(<<"é", T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, "é">>);
unescape(<<"ê", T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, "ê">>);
unescape(<<"ë", T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, "ë">>);
unescape(<<"î", T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, "î">>);
unescape(<<"ï", T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, "ï">>);
unescape(<<"ö", T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, "ö">>);
unescape(<<"ù", T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, "ù">>);
unescape(<<"ú", T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, "ú">>);
unescape(<<"û", T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, "û">>);
unescape(<<"ü", T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, "ü">>);

unescape(<<C, T/binary>>, Acc) ->
    unescape(T, <<Acc/binary, C>>).


Regards,
Zabrane

On Sep 25, 2012, at 8:02 PM, Zabrane Mickael wrote:

> Hi huis,
> 
> I want  to convert an HTML escaped text (http://www.w3schools.com/tags/ref_entities.asp) like this one:
> 
> bourgé Cop
> 
> to:
> 
> bourgé Cop
> 
> Is there any Erlang library for this?
> 
> Regards,
> Zabrane
> 



-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://erlang.org/pipermail/erlang-questions/attachments/20120925/207ec7fd/attachment.html>


More information about the erlang-questions mailing list