[erlang-questions] Pattern-matching function?

Chris Newcombe chris.newcombe@REDACTED
Thu Oct 30 22:23:09 CET 2008


> make_match_fun(Pstr) -> element(2, erl_eval:expr(hd(element(2,
> erl_parse:parse_exprs(element(2,
> erl_scan:string(lists:flatten(io_lib:format("fun (~s=__P) ->
> {matched,__P}; (_) -> fail end.", [Pstr]))))))), erl_eval:new_bindings())).

Thanks -- that's very useful :)

>  - Generate the code as text in memory, call the scanner and parser, and
>   pass the result to compile:forms/2. You can't use the preprocessor
>   this way (it only works on files), but you don't have to go to disk,
>   and you don't need to learn about the syntax tree representation.

I know that isn't a good technique for the particular problem being
discussed here, but I wanted to point out that there is a useful
module to do this here:

   http://www.erlang.org/pipermail/erlang-questions/2007-July/028023.html

Mats Cronqvist wrote it, and I extended a little bit (slightly more
macro support, and support for -include).

Sometimes you do want to compile a new version of an existing module on the fly.
e.g. I use it to generate a pattern-matching function to efficiently
control dynamic debug/logging levels.
(This is an enhancement of an idea from Edwin Fine -- see the end of
his post http://www.erlang.org/pipermail/erlang-questions/2008-October/038674.html
-- thanks Edwin :)

Example code is below.

(This also uses Matthias Lang's excellent and indispensable
trunc_io.erl module: see
http://www.erlang.org/pipermail/erlang-questions/2003-February/007571.html
.  That post from Matthias also discusses use of tracing to control
debug/logging levels, but I prefer the following approach.)


%%% File    : is_debug_enabled.erl
%%%
%%% See also debug.hrl (for the code controlled by the function in this module)
%%%
%%% This module contains a single exported function which efficiently
%%% controls the level of debug logging (and debug code execution) in
%%% the rest of the application.
%%%
%%% To change the logging level we generate, compile and load a new
version of this module.
%%%
%%% By default the only debug that is enabled is everything at level
%%% ?DBG_ALWAYS.
%%%
%%% By 'efficient' we mean
%%%
%%%   - each invocation of the ?DBG macro makes a single
%%%     fully-qualified function call to a tiny constant-time function
%%%     -- for/3 in this module.
%%%
%%%   - the other arguments (logging) to ?DBG or ?DBG_CODE are
%%%     only evaluated if for/3 returns true (although any constants
%%%     in those arguments will already be compiled into the constant-table
%%%     for the module containing the ?DBG instance, that seems unavoidable).
%%%
%%% Runtime control over a global cross-module system is actually a
%%% fairly difficult problem for Erlang as there are no global or
%%% module-level variables.  Other potential solutions include
%%%
%%%   - 'tracing a do-nothing function' (see
http://www.erlang.org/pipermail/erlang-questions/2003-February/007571.html)
%%%   - parameterized modules (passing the logger funs as parameters;
but requires code changes)
%%%   - opening a port to a linked-in driver which returns the value
%%%
%%% We use a function call to decide if a ?DBG instance is active.
%%% This allows us to change the level of debug logging/activity by
%%% compiling and hot-loading a new version of the module that
%%% contains the decision-function.  Users simply change the
%%% definition of the 'for' function, to returns true for the subset
%%% of debug statements that you want to enable (and continue to
%%% return false for everything else).
%%%
%%% It is very easy to dynamically change the code, by using
%%% compile_module_from_string.erl
%%%
%%% Currently we can control execution of debug code by any
%%% combination if level and source-code location.  (Ideally we would
%%% also pattern match on CurrentFunction here, but it's a bit too
%%% expensive for every debug statement to call process_info(self(),
%%% current_function) when deciding if it is active.)
%%%

%%% See debug.hrl for macros that use this.

-module(is_debug_enabled).

-export([for/3]).

-include("debug.hrl").   % for DBG_WARN, etc


%% Important: this function must ALWAYS have a final catch-all clause
%% that returns false.  The caller does NOT wrap the call in an
%% exception handler.

for(?DBG_ALWAYS, _Module, _Line) -> true;
for(_, _, _) -> false.


%%% File    : debug.hrl
%%%
%%% See also module  is_debug_enabled.erl

%% Debug levels; numeric so that is_debug_enabled:for() can efficiently
%% tell if a given level is included or excluded by a cut-off threshold.
%% (we want cut-off thresholds to transitive).
%%
%% e.g.
%%
%%  '(Importance, OptionalModule, _) when Importance >= ?DBG_ERROR' => 'true'
%%
%% will enable ?DBG() statements with levels ERROR, CRITICAL, or ALWAYS
%%

-define(DBG_NEVER,     00).
-define(DBG_VERBOSE,   50).
-define(DBG_INFO,      60).
-define(DBG_WARN,      70).
-define(DBG_ERROR,     80).
-define(DBG_CRITICAL,  90).
-define(DBG_ALWAYS,   100).

-define(DBG(Importance, Term),  ?DBG_MAXLEN(Importance, Term, 5000)).

%% This does not bind any variables, so we don't but the body in an
%% anonymous fun.
-define(DBG_MAXLEN(Importance, Term, MaxLength),
        case is_debug_enabled:for(Importance, ?MODULE, ?LINE) of
            false     -> ok;
            true      ->
                debug:logger(
                  "~w:~w: ~s~n",
                  [%% This gets the {module,Name,Arity} of the current function
                   %% (?MODULE would be faster and less verbose)
                   element(2, process_info(self(), current_function)),
                   ?LINE,
                   trunc_io:safe_print(Term, MaxLength)])
        end).

%% In order to avoid polluting the calling function (leaving bound
variables behind)
%% or being polluted by the calling function (unwanted variable bindings),
%% we execute the code in an anonymous fun, which is the recommended way to
%% get a clean scope in Erlang.
%%
%% This *will* allow exceptions to escape from Code into the caller function
%% that's a feature.
%%
-define(DBG_CODE(Importance, Code),
        case is_debug_enabled:for(Importance, ?MODULE, ?LINE) of
            false -> ok;
            true  -> (fun() -> Code end)()
        end).


%%% File    : debug.erl

%% See comments in
%%
%%   debug.hrl
%%   is_debug_enabled.erl
%%%

-module(debug).

-export([logger/2,
         validate_is_debug_enabled_clauses/1,
         set_is_debug_enabled/1,
         trunc_process_info_str/2,
         ordered_process_info/1]).


-include("debug.hrl").


%% For EUnit's debugFmt macro (otherwise logging does not appear
during EUnit tests)
-include_lib("eunit/include/eunit.hrl").


logger(FormatStr, Args) ->
      case process_info(self(), registered_name) of
          {registered_name, ProcessName} ->
              ?debugFmt("~w: ~w: " ++ FormatStr, [ProcessName | [now()
| Args]]);
          [] ->
              case get('$ancestors') of  % proc_lib:spawn sets this
                  undefined ->
                      ?debugFmt("~w: ~w: " ++ FormatStr, [self() |
[now() | Args]]);
                  Ancestors ->
                      ?debugFmt("~w (child of ~p): ~w: " ++ FormatStr,
[self() | [hd(Ancestors) | [now() | Args]]])
              end
      end.


validate_is_debug_enabled_clauses(Clauses) ->

    %% There's not much value in being pedantic in the pattern regex,
    %% as it's not a proper grammar and compile_module_to_string is
    %% the proper test.  The main purpose here is to stop arbitrary
    %% code being injected when we compile the new version of the
    %% module.  (e.g. below we absolutely restrict the body of the
    %% clauses to returning true or false).

    {ok, PatternRe} = regexp:parse("^\([^,]+, *[^,]+, *[^,]+\).*$"),

    lists:foreach(fun({PatternStr, <<"true">>}) ->
                          validate_clause_pattern(PatternStr, PatternRe);
                     ({PatternStr, <<"false">>}) ->
                          validate_clause_pattern(PatternStr, PatternRe)
                  end,
                  Clauses).

validate_clause_pattern(PatternStr, PatternRe) ->

    case regexp:first_match(PatternStr, PatternRe) of
        {match, _, _} -> ok;
        Error         -> throw({bad_pattern, {Error, PatternStr}})
    end.


set_is_debug_enabled(Clauses) ->

    %% This involves generating new code for is_debug_enabled.erl
    %% See comments in that module for details.

    CodeStr = lists:flatten(
                ["-module(is_debug_enabled).\n",
                 "-export([for/3]).\n",
                 "-include(\"debug.hrl\").\n",
                 %% We don't include the ALWAYS clause as in an emergency
                 %% we might want to remove it.
                 %%%% "for(?DBG_ALWAYS, _Module, _Line) -> true;\n",
                 lists:map(fun({PatternStr, ValueBin}) ->
                                   ValueStr = binary_to_list(ValueBin),
                                   io_lib:format("for~s -> ~s;\n",
[PatternStr, ValueStr])
                           end,
                           Clauses),
                 %% The 'for' function must ALWAYS have a catch-all
clause that returns false.
                 "for(_, _, _) -> false.\n"
                ]),

    %% The generated module needs to include debug.hrl
    %% We assume it is in the same utilties' library as
compile_module_from_string,
    %% and construct the include path from that

    %% First we have to ensure that the module is loaded, otherwise
    %% is_loaded won't be able to report it's path. (This should only
    %% necessary in development, as the boot script should proactively
    %% load all code at startup)

    code:ensure_loaded(compile_module_from_string),

    {file, AbsBeamPath}
        = code:is_loaded(compile_module_from_string),

    {LibDir, _BeamFileName} = ?GU:split_string_on_delimiter("/ebin/",
AbsBeamPath),
    IncludePathOptions = [{i, LibDir ++ "/include"}],

    {is_debug_enabled, CompiledCodeBin} =
        compile_module_from_string:compile(
          CodeStr,
          [return_errors, return_warnings, verbose]
          ++ IncludePathOptions),

    ?DBG(?DBG_INFO, ["Successfully compiled new version of is_debug_enabled"]),

    %% We have to purge the 'old' version of the module, so that
code:load_binary can
    %% move the current version to the 'old' version before it
installs the new version.
    %% The first time through this fails with {error, not_purged} as
there isn't an
    %% old version.  So we don't check the result.

    code:purge(is_debug_enabled),

    {module, is_debug_enabled} =
        code:load_binary(is_debug_enabled,
                         compiled_by__set_is_debug_enabled,  % tag to
identify the 'source' of the module (normal its .erl filename)
                         CompiledCodeBin),

    ?DBG(?DBG_INFO, ["Successfully loaded new version of
is_debug_enabled: ", CodeStr]),

    ok.


trunc_process_info_str(Pid, MaxLen) ->

    trunc_io:safe_print(ordered_process_info(Pid), MaxLen).


%% Note: process_info/1 is documented as 'use only for debugging'.
%% A bigger issue is that the result is not ordered, but we really
%% want the potentially large items (message_queue, dictionary) at the end.
%%
%% So we use the approved process_info(Pid, Tag) for the
%% interesting tags, and put the potentially large items at the end.
%%
%% This also returns the gproc info for the process.
%%
ordered_process_info(Pid) ->

    %% The list of tags is from R12B-2 documentation.
    InfoSpec =
        [registered_name,        % Atom is the registered name of the
process. If the process has no registered name, this tuple is not
present in the list.
         initial_call,           % Module, Function, Arity is the
initial function call with which the process was spawned.
         current_function,       % Module, Function, Args is the
current function call of the process.
         trap_exit,              %
         status,                 % Status is the status of the
process. Status is waiting (waiting for a message), running, runnable
(ready to run, but another process is running), or suspended
(suspended on a "busy" port or by the erlang:suspend_process/[1,2]
BIF).
         memory,                 % Size is the size in bytes of the
process. This includes call stack, heap and internal structures.
         stack_size,             % Size is the stack size of the
process in words.
         heap_size,              % Size is the size in words of
youngest heap generation of the process. This generation currently
include the stack of the process. This information is highly
implementation dependent, and may change if the implementation change.
         total_heap_size,        % Size is the total size in words of
all heap fragments of the process. This currently include the stack of
the process.
         message_queue_len,      % MessageQueueLen is the number of
messages currently in the message queue of the process. This is the
length of the list MessageQueue returned as the info item messages
(see below).
         garbage_collection,     % GCInfo is a list which contains
miscellaneous information about garbage collection for this process.
The content of GCInfo may be changed without prior notice.
         binary,                 % BinInfo is a list containing
miscellaneous information about binaries currently being referred to
by this process. This InfoTuple may be changed or removed without
prior notice.
         %%message_binary,         % BinInfo is a list containing
miscellaneous information about binaries currently being referred to
by the message area. This InfoTuple is only valid on an emulator using
the hybrid heap type. This InfoTuple may be changed or removed without
prior notice.
         links,                  % Pids is a list of pids, with
processes to which the process has a link.
         monitored_by,           % A list of pids that are monitoring
the process (with erlang:monitor/2).
         monitors,               % A list of monitors (started by
erlang:monitor/2) that are active for the process. For a local process
monitor or a remote process monitor by pid, the list item is {process,
Pid}, and for a remote process monitor by name, the list item is
{process, {RegName, Node}}.
         priority,               % Level is the current priority level
for the process. For more information on priorities see
process_flag(priority, Level).
         reductions,             % Number is the number of reductions
executed by the process.
         group_leader,           % GroupLeader is group leader for the
IO of the process.
         %%error_handler,          % Module is the error handler
module used by the process (for undefined function calls, for
example).
         %%sequential_trace_token, % SequentialTraceToken the
sequential trace token for the process. This InfoTuple may be changed
or removed without prior notice.
         suspending,             % SuspendeeList is a list of
{Suspendee, ActiveSuspendCount, OutstandingSuspendCount} tuples.
Suspendee is the pid of a process that have been or is to be suspended
by the process identified by Pid via the erlang:suspend_process/2
BIF, or the erlang:suspend_process/1  BIF. ActiveSuspendCount is the
number of times the Suspendee has been suspended by Pid.
OutstandingSuspendCount is the number of not yet completed suspend
requests sent by Pid. That is, if ActiveSuspendCount /= 0, Suspendee
is currently in the suspended state, and if OutstandingSuspendCount /=
0 the asynchronous  option of erlang:suspend_process/2 has been used
and the suspendee has not yet been suspended by Pid. Note that the
ActiveSuspendCount and OutstandingSuspendCount are not the total
suspend count on Suspendee, only the parts contributed by Pid.
         trace,                  % InternalTraceFlags is an integer
representing internal trace flag for this process. This InfoTuple may
be changed or removed without prior notice.
         %%catchlevel,             % CatchLevel is the number of
currently active catches in this process. This InfoTuple may be
changed or removed without prior notice.
         %% Potentially large
         backtrace,              % The binary Bin contains the same
information as the output from erlang:process_display(Pid, backtrace).
Use binary_to_list/1 to obtain the string of characters from the
binary.
         dictionary,             % Dictionary is the dictionary of the process.
         messages,               % MessageQueue is a list of the
messages to the process, which have not yet been processed.
         last_calls              % The value is false if call saving
is not active for the process (see process_flag/3). If call saving is
active, a list is returned, in which the last element is the most
recent called.
         ],

    ProcInfoData = process_info(Pid, InfoSpec),

    GProcData = case (catch gproc:info(Pid, gproc)) of
                    {gproc, _KeysValues} = Data ->
                        Data;
                    _ ->
                        failed_to_get_gproc_data
                end,

    {GProcData, ProcInfoData}.



More information about the erlang-questions mailing list