regex_mod.c

Go to the documentation of this file.
00001 /*
00002  * $Id: regex_mod.c 5535 2009-01-28 23:38:38Z ibc_sf $
00003  *
00004  * regex module - pcre operations
00005  *
00006  * Copyright (C) 2008 Iñaki Baz Castillo
00007  *
00008  * This file is part of Kamailio, a free SIP server.
00009  *
00010  * Kamailio is free software; you can redistribute it and/or modify
00011  * it under the terms of the GNU General Public License as published by
00012  * the Free Software Foundation; either version 2 of the License, or
00013  * (at your option) any later version
00014  *
00015  * Kamailio is distributed in the hope that it will be useful,
00016  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018  * GNU General Public License for more details.
00019  *
00020  * You should have received a copy of the GNU General Public License 
00021  * along with this program; if not, write to the Free Software 
00022  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00023  *
00024  * History:
00025  * --------
00026  *  2009-01-14  initial version (Iñaki Baz Castillo)
00027  */
00028 
00029 
00030 /*!
00031  * \file
00032  * \brief REGEX :: Perl-compatible regular expressions using PCRE library
00033  * Copyright (C) 2008 Iñaki Baz Castillo
00034  * \ingroup regex
00035  */
00036 
00037 #include <stdio.h>
00038 #include <stdlib.h>
00039 #include <string.h>
00040 #include <sys/stat.h>
00041 #include <pcre.h>
00042 #include "../../sr_module.h"
00043 #include "../../dprint.h"
00044 #include "../../pt.h"
00045 #include "../../mem/shm_mem.h"
00046 #include "../../str.h"
00047 #include "../../locking.h"
00048 #include "../../mod_fix.h"
00049 #include "../../mi/mi.h"
00050 
00051 MODULE_VERSION
00052 
00053 #define START 0
00054 #define RELOAD 1
00055 
00056 #define FILE_MAX_LINE 500        /*!< Max line size in the file */
00057 #define MAX_GROUPS 20            /*!< Max number of groups */
00058 #define GROUP_MAX_SIZE 8192      /*!< Max size of a group */
00059 
00060 
00061 /*
00062  * Locking variables
00063  */
00064 gen_lock_t *reload_lock;
00065 
00066 
00067 /*
00068  * Module exported parameter variables
00069  */
00070 static char *file;
00071 static int max_groups            = MAX_GROUPS;
00072 static int group_max_size        = GROUP_MAX_SIZE;
00073 static int pcre_caseless         = 0;
00074 static int pcre_multiline        = 0;
00075 static int pcre_dotall           = 0;
00076 static int pcre_extended         = 0;
00077 
00078 
00079 /*
00080  * Module internal parameter variables
00081  */
00082 static pcre **pcres;
00083 static pcre ***pcres_addr;
00084 static int *num_pcres;
00085 static int pcre_options = 0x00000000;
00086 
00087 
00088 /*
00089  * Module core functions
00090  */
00091 static int mod_init(void);
00092 static void destroy(void);
00093 
00094 
00095 /*
00096  * Module internal functions
00097  */
00098 static int load_pcres(int);
00099 static void free_shared_memory(void);
00100 
00101 
00102 /*
00103  * Script functions
00104  */
00105 static int w_pcre_match(struct sip_msg* _msg, char* _s1, char* _s2);
00106 static int w_pcre_match_group(struct sip_msg* _msg, char* _s1, char* _s2);
00107 
00108 
00109 /*
00110  * MI functions
00111  */
00112 static struct mi_root* mi_pcres_reload(struct mi_root* cmd, void* param);
00113 
00114 
00115 /*
00116  * Exported functions
00117  */
00118 static cmd_export_t cmds[] =
00119 {
00120    { "pcre_match", (cmd_function)w_pcre_match, 2, fixup_spve_spve, 0,
00121       REQUEST_ROUTE|FAILURE_ROUTE|ONREPLY_ROUTE|BRANCH_ROUTE|LOCAL_ROUTE },
00122    { "pcre_match_group", (cmd_function)w_pcre_match_group, 2, fixup_spve_uint, 0,
00123       REQUEST_ROUTE|FAILURE_ROUTE|ONREPLY_ROUTE|BRANCH_ROUTE|LOCAL_ROUTE },
00124    { "pcre_match_group", (cmd_function)w_pcre_match_group, 1, fixup_spve_null, 0,
00125       REQUEST_ROUTE|FAILURE_ROUTE|ONREPLY_ROUTE|BRANCH_ROUTE|LOCAL_ROUTE },
00126    { 0, 0, 0, 0, 0, 0 }
00127 };
00128 
00129 
00130 /*
00131  * Exported parameters
00132  */
00133 static param_export_t params[] = {
00134    {"file",                STR_PARAM,  &file                },
00135    {"max_groups",          INT_PARAM,  &max_groups          },
00136    {"group_max_size",      INT_PARAM,  &group_max_size      },
00137    {"pcre_caseless",       INT_PARAM,  &pcre_caseless       },
00138    {"pcre_multiline",      INT_PARAM,  &pcre_multiline      },
00139    {"pcre_dotall",         INT_PARAM,  &pcre_dotall         },
00140    {"pcre_extended",       INT_PARAM,  &pcre_extended       },
00141    {0, 0, 0}
00142 };
00143 
00144 
00145 /*
00146  * Exported MI functions
00147  */
00148 static mi_export_t mi_cmds[] = {
00149    { "regex_reload", mi_pcres_reload, MI_NO_INPUT_FLAG, 0, 0 },
00150    { 0, 0, 0, 0 ,0 }
00151 };
00152 
00153 
00154 /*
00155  * Module interface
00156  */
00157 struct module_exports exports = {
00158    "regex",                   /*!< module name */
00159    DEFAULT_DLFLAGS,           /*!< dlopen flags */
00160    cmds,                      /*!< exported functions */
00161    params,                    /*!< exported parameters */
00162    0,                         /*!< exported statistics */
00163    mi_cmds,                   /*!< exported MI functions */
00164    0,                         /*!< exported pseudo-variables */
00165    0,                         /*!< extra processes */
00166    mod_init,                  /*!< module initialization function */
00167    (response_function) 0,     /*!< response handling function */
00168    destroy,                   /*!< destroy function */
00169    0                          /*!< per-child init function */
00170 };
00171 
00172 
00173 
00174 /*! \brief
00175  * Init module function
00176  */
00177 static int mod_init(void)
00178 {
00179    
00180    LM_INFO("initializing module...\n");
00181    
00182    /* Group matching feature */
00183    if (file == NULL) {
00184       LM_NOTICE("'file' parameter is not set, group matching disabled\n");
00185    } else {
00186       /* Create and init the lock */
00187       reload_lock = lock_alloc();
00188       if (reload_lock == NULL) {
00189          LM_ERR("cannot allocate reload_lock\n");
00190          goto err;
00191       }
00192       if (lock_init(reload_lock) == NULL) {
00193          LM_ERR("cannot init the reload_lock\n");
00194          lock_dealloc(reload_lock);
00195          goto err;
00196       }
00197       
00198       /* PCRE options */
00199       if (pcre_caseless != 0) {
00200          LM_DBG("PCRE CASELESS enabled\n");
00201          pcre_options = pcre_options | PCRE_CASELESS;
00202       }
00203       if (pcre_multiline != 0) {
00204          LM_DBG("PCRE MULTILINE enabled\n");
00205          pcre_options = pcre_options | PCRE_MULTILINE;
00206       }
00207       if (pcre_dotall != 0) {
00208          LM_DBG("PCRE DOTALL enabled\n");
00209          pcre_options = pcre_options | PCRE_DOTALL;
00210       }
00211       if (pcre_extended != 0) {
00212          LM_DBG("PCRE EXTENDED enabled\n");
00213          pcre_options = pcre_options | PCRE_EXTENDED;
00214       }
00215       LM_DBG("PCRE options: %i\n", pcre_options);
00216       
00217       /* Pointer to pcres */
00218       if ((pcres_addr = shm_malloc(sizeof(pcre **))) == 0) {
00219          LM_ERR("no memory for pcres_addr\n");
00220          goto err;
00221       }
00222       
00223       /* Integer containing the number of pcres */
00224       if ((num_pcres = shm_malloc(sizeof(int))) == 0) {
00225          LM_ERR("no memory for num_pcres\n");
00226          goto err;
00227       }
00228       
00229       /* Load the pcres */
00230       LM_NOTICE("loading pcres...\n");
00231       if (load_pcres(START)) {
00232          LM_CRIT("failed to load pcres\n");
00233          goto err;
00234       }
00235    }
00236    
00237    return 0;
00238    
00239 err:
00240    free_shared_memory();
00241    return -1;
00242 }
00243 
00244 
00245 static void destroy(void)
00246 {  
00247    free_shared_memory();
00248 }
00249 
00250 
00251 /*! \brief Convert the file content into regular expresions and store them in pcres */
00252 static int load_pcres(int action)
00253 {
00254    int i, j;
00255    FILE *f;
00256    char line[FILE_MAX_LINE];
00257    char **patterns = NULL;
00258    pcre *pcre_tmp = NULL;
00259    int pcre_size;
00260    int pcre_rc;
00261    const char *pcre_error;
00262    int pcre_erroffset;
00263    int num_pcres_tmp = 0;
00264    pcre **pcres_tmp = NULL;
00265    
00266    /* Get the lock */
00267    lock_get(reload_lock);
00268    
00269    if (!(f = fopen(file, "r"))) {
00270       LM_ERR("could not open file '%s'\n", file);
00271       goto err;
00272    }
00273    
00274    /* Array containing each pattern in the file */
00275    if ((patterns = pkg_malloc(sizeof(char*) * max_groups)) == 0) {
00276       LM_ERR("no more memory for patterns\n");
00277       fclose(f);
00278       goto err;
00279    }
00280    for (i=0; i<max_groups; i++) {
00281       patterns[i] = NULL;
00282    }
00283    for (i=0; i<max_groups; i++) {
00284       if ((patterns[i] = pkg_malloc(sizeof(char) * group_max_size)) == 0) {
00285          LM_ERR("no more memory for patterns[%d]\n", i);
00286          fclose(f);
00287          goto err;
00288       }
00289       memset(patterns[i], '\0', group_max_size);
00290    }
00291    
00292    /* Read the file and extract the patterns */
00293    memset(line, '\0', FILE_MAX_LINE);
00294    i = -1;
00295    while (fgets(line, FILE_MAX_LINE, f) != NULL) {
00296       
00297       /* Ignore comments and lines starting by space, tab, CR, LF */
00298       if(isspace(line[0]) || line[0]=='#') {
00299          memset(line, '\0', FILE_MAX_LINE);
00300          continue;
00301       }
00302       
00303       /* First group */
00304       if (i == -1 && line[0] != '[') {
00305          LM_ERR("first group must be initialized with [0] before any regular expression\n");
00306          fclose(f);
00307          goto err;
00308       }
00309       
00310       /* New group */
00311       if (line[0] == '[') {
00312          i++;
00313          /* Check if there are more patterns than the max value */
00314          if (i >= max_groups) {
00315             LM_ERR("max patterns exceded\n");
00316             fclose(f);
00317             goto err;
00318          }
00319          /* Start the regular expression with '(' */
00320          patterns[i][0] = '(';
00321          memset(line, '\0', FILE_MAX_LINE);
00322          continue;
00323       }
00324       
00325       /* Check if the patter size is too big (aprox) */
00326       if (strlen(patterns[i]) + strlen(line) >= group_max_size - 2) {
00327          LM_ERR("pattern max file exceded\n");
00328          fclose(f);
00329          goto err;
00330       }
00331       
00332       /* Append ')' at the end of the line */
00333       if (line[strlen(line) - 1] == '\n') {
00334          line[strlen(line)] = line[strlen(line) - 1];
00335          line[strlen(line) - 2] = ')';
00336       } else {
00337          /* This is the last char in the file and it's not \n */
00338          line[strlen(line)] = ')';
00339       }
00340       
00341       /* Append '(' at the beginning of the line */
00342       memcpy(patterns[i]+strlen(patterns[i]), "(", 1);
00343       
00344       /* Append the line to the current pattern */
00345       memcpy(patterns[i]+strlen(patterns[i]), line, strlen(line));
00346       
00347       memset(line, '\0', FILE_MAX_LINE);
00348    }
00349    num_pcres_tmp = i + 1;
00350    
00351    fclose(f);
00352    
00353    /* Fix the patterns */
00354    for (i=0; i < num_pcres_tmp; i++) {
00355       
00356       /* Convert empty groups in unmatcheable regular expression ^$ */
00357       if (strlen(patterns[i]) == 1) {
00358          patterns[i][0] = '^';
00359          patterns[i][1] = '$';
00360          patterns[i][2] = '\0';
00361          continue;
00362       }
00363       
00364       /* Delete possible '\n' at the end of the pattern */
00365       if (patterns[i][strlen(patterns[i])-1] == '\n') {
00366          patterns[i][strlen(patterns[i])-1] = '\0';
00367       }
00368       
00369       /* Replace '\n' with '|' (except at the end of the pattern) */
00370       for (j=0; j < strlen(patterns[i]); j++) {
00371          if (patterns[i][j] == '\n' && j != strlen(patterns[i])-1) {
00372             patterns[i][j] = '|';
00373          }
00374       }
00375       
00376       /* Add ')' at the end of the pattern */
00377       patterns[i][strlen(patterns[i])] = ')';
00378    }
00379    
00380    /* Log the group patterns */
00381    LM_NOTICE("num groups = %d\n\n", num_pcres_tmp);
00382    for (i=0; i < num_pcres_tmp; i++) {
00383       LM_NOTICE("<group[%d]>%s</group[%d]> (size = %i)\n", i, patterns[i], i, (int)strlen(patterns[i]));
00384    }
00385    
00386    /* Temporal pointer of pcres */
00387    if ((pcres_tmp = pkg_malloc(sizeof(pcre *) * num_pcres_tmp)) == 0) {
00388       LM_ERR("no more memory for pcres_tmp\n");
00389       goto err;
00390    }
00391    for (i=0; i<num_pcres_tmp; i++) {
00392       pcres_tmp[i] = NULL;
00393    }
00394    
00395    /* Compile the patters */
00396    for (i=0; i<num_pcres_tmp; i++) {
00397    
00398       pcre_tmp = pcre_compile(patterns[i], pcre_options, &pcre_error, &pcre_erroffset, NULL);
00399       if (pcre_tmp == NULL) {
00400          LM_ERR("pcre_tmp compilation of '%s' failed at offset %d: %s\n", patterns[i], pcre_erroffset, pcre_error);
00401          goto err;
00402       }
00403       pcre_rc = pcre_fullinfo(pcre_tmp, NULL, PCRE_INFO_SIZE, &pcre_size);
00404       if (pcre_rc) {
00405          printf("pcre_fullinfo on compiled pattern[%i] yielded error: %d\n", i, pcre_rc);
00406          goto err;
00407       }
00408       
00409       if ((pcres_tmp[i] = pkg_malloc(pcre_size)) == 0) {
00410          LM_ERR("no more memory for pcres_tmp[%i]\n", i);
00411          goto err;
00412       }
00413       
00414       memcpy(pcres_tmp[i], pcre_tmp, pcre_size);
00415       pcre_free(pcre_tmp);
00416       pkg_free(patterns[i]);
00417    }
00418    
00419    /* Copy to shared memory */
00420    if (action == RELOAD) {
00421       for(i=0; i<*num_pcres; i++) {  /* Use the previous num_pcres value */
00422          if (pcres[i]) {
00423             shm_free(pcres[i]);
00424          }
00425       }
00426       shm_free(pcres);
00427    }
00428    if ((pcres = shm_malloc(sizeof(pcre *) * num_pcres_tmp)) == 0) {
00429       LM_ERR("no more memory for pcres\n");
00430       goto err;
00431    }
00432    for (i=0; i<num_pcres_tmp; i++) {
00433       pcres[i] = NULL;
00434    }
00435    for (i=0; i<num_pcres_tmp; i++) {
00436       pcre_rc = pcre_fullinfo(pcres_tmp[i], NULL, PCRE_INFO_SIZE, &pcre_size);
00437       if ((pcres[i] = shm_malloc(pcre_size)) == 0) {
00438          LM_ERR("no more memory for pcres[%i]\n", i);
00439          goto err;
00440       }
00441       memcpy(pcres[i], pcres_tmp[i], pcre_size);
00442    }
00443    *num_pcres = num_pcres_tmp;
00444    *pcres_addr = pcres;
00445 
00446    /* Free used memory */
00447    for (i=0; i<num_pcres_tmp; i++) {
00448       pkg_free(pcres_tmp[i]);
00449    }
00450    pkg_free(pcres_tmp);
00451    pkg_free(patterns);
00452    lock_release(reload_lock);
00453    
00454    return 0;
00455    
00456    
00457 err:
00458    if (patterns) {
00459       for(i=0; i<max_groups; i++) {
00460          if (patterns[i]) {
00461             pkg_free(patterns[i]);
00462          }
00463       }
00464       pkg_free(patterns);
00465    }
00466    if (pcres_tmp) {
00467       for (i=0; i<num_pcres_tmp; i++) {
00468          if (pcres_tmp[i]) {
00469             pkg_free(pcres_tmp[i]);
00470          }
00471       }
00472       pkg_free(pcres_tmp);
00473    }
00474    if (reload_lock) {
00475       lock_release(reload_lock);
00476    }
00477    if (action == START) {
00478       free_shared_memory();
00479    }
00480    return -1;
00481 }
00482 
00483 
00484 static void free_shared_memory(void)
00485 {
00486    int i;
00487    
00488    if (pcres) {
00489       for(i=0; i<*num_pcres; i++) {
00490          if (pcres[i]) {
00491             shm_free(pcres[i]);
00492          }
00493       }
00494       shm_free(pcres);
00495    }
00496    
00497    if (num_pcres) {
00498       shm_free(num_pcres);
00499    }
00500    
00501    if (pcres_addr) {
00502       shm_free(pcres_addr);
00503    }
00504    
00505    if (reload_lock) {
00506       lock_destroy(reload_lock);
00507       lock_dealloc(reload_lock);
00508     }
00509 }
00510 
00511 
00512 /*
00513  * Script functions
00514  */
00515 
00516 /*! \brief Return true if the argument matches the regular expression parameter */
00517 static int w_pcre_match(struct sip_msg* _msg, char* _s1, char* _s2)
00518 {
00519    str string;
00520    str regex;
00521    pcre *pcre_re = NULL;
00522    int pcre_rc;
00523    const char *pcre_error;
00524    int pcre_erroffset;
00525    
00526    if (_s1 == NULL) {
00527       LM_ERR("bad parameters\n");
00528       return -2;
00529    }
00530    
00531    if (_s2 == NULL) {
00532       LM_ERR("bad parameters\n");
00533       return -2;
00534    }
00535    
00536    if (fixup_get_svalue(_msg, (gparam_p)_s1, &string))
00537    {
00538       LM_ERR("cannot print the format for string\n");
00539       return -3;
00540    }
00541    if (fixup_get_svalue(_msg, (gparam_p)_s2, &regex))
00542    {
00543       LM_ERR("cannot print the format for regex\n");
00544       return -3;
00545    }
00546    
00547    pcre_re = pcre_compile(regex.s, pcre_options, &pcre_error, &pcre_erroffset, NULL);
00548    if (pcre_re == NULL) {
00549       LM_ERR("pcre_re compilation of '%s' failed at offset %d: %s\n", regex.s, pcre_erroffset, pcre_error);
00550       return -4;
00551    }
00552    
00553    pcre_rc = pcre_exec(
00554       pcre_re,                    /* the compiled pattern */
00555       NULL,                       /* no extra data - we didn't study the pattern */
00556       string.s,                   /* the matching string */
00557       (int)(string.len),          /* the length of the subject */
00558       0,                          /* start at offset 0 in the string */
00559       0,                          /* default options */
00560       NULL,                       /* output vector for substring information */
00561       0);                         /* number of elements in the output vector */
00562    
00563    /* Matching failed: handle error cases */
00564    if (pcre_rc < 0) {
00565       switch(pcre_rc) {
00566          case PCRE_ERROR_NOMATCH:
00567             LM_DBG("'%s' doesn't match '%s'\n", string.s, regex.s);
00568             break;
00569          default:
00570             LM_DBG("matching error '%d'\n", pcre_rc);
00571             break;
00572       }
00573       return -1;
00574    }
00575 
00576    LM_DBG("'%s' matches '%s'\n", string.s, regex.s);
00577    return 1;
00578 }
00579 
00580 
00581 /*! \brief Return true if the string argument matches the pattern group parameter */
00582 static int w_pcre_match_group(struct sip_msg* _msg, char* _s1, char* _s2)
00583 {
00584    str string;
00585    int num_pcre;
00586    int pcre_rc;
00587    
00588    /* Check if group matching feature is enabled */
00589    if (file == NULL) {
00590       LM_ERR("group matching is disabled\n");
00591       return -2;
00592    }
00593    
00594    if (_s1 == NULL) {
00595       LM_ERR("bad parameters\n");
00596       return -3;
00597    }
00598    
00599    if (_s2 == NULL) {
00600       num_pcre = 0;
00601    } else {
00602       num_pcre = (uint)(long)_s2;
00603    }
00604    
00605    if (num_pcre >= *num_pcres) {
00606       LM_ERR("invalid pcre index '%i', there are %i pcres\n", num_pcre, *num_pcres);
00607       return -4;
00608    }
00609    
00610    if (fixup_get_svalue(_msg, (gparam_p)_s1, &string))
00611    {
00612       LM_ERR("cannot print the format\n");
00613       return -5;
00614    }
00615    
00616    lock_get(reload_lock);
00617    
00618    pcre_rc = pcre_exec(
00619       (*pcres_addr)[num_pcre],    /* the compiled pattern */
00620       NULL,                       /* no extra data - we didn't study the pattern */
00621       string.s,                   /* the matching string */
00622       (int)(string.len),          /* the length of the subject */
00623       0,                          /* start at offset 0 in the string */
00624       0,                          /* default options */
00625       NULL,                       /* output vector for substring information */
00626       0);                         /* number of elements in the output vector */
00627    
00628    lock_release(reload_lock);
00629    
00630    /* Matching failed: handle error cases */
00631    if (pcre_rc < 0) {
00632       switch(pcre_rc) {
00633          case PCRE_ERROR_NOMATCH:
00634             LM_DBG("'%s' doesn't match pcres[%i]\n", string.s, num_pcre);
00635             break;
00636          default:
00637             LM_DBG("matching error '%d'\n", pcre_rc);
00638             break;
00639       }
00640       return -1;
00641    }
00642    else {
00643       LM_DBG("'%s' matches pcres[%i]\n", string.s, num_pcre);
00644       return 1;
00645    }
00646    
00647 }
00648 
00649 
00650 /*
00651  * MI functions
00652  */
00653 
00654 /*! \brief Reload pcres by reading the file again */
00655 static struct mi_root* mi_pcres_reload(struct mi_root* cmd, void* param)
00656 {
00657    /* Check if group matching feature is enabled */
00658    if (file == NULL) {
00659       LM_NOTICE("'file' parameter is not set, group matching disabled\n");
00660       return init_mi_tree(403, MI_SSTR("Group matching not enabled"));
00661    }
00662    
00663    LM_NOTICE("reloading pcres...\n");
00664    if (load_pcres(RELOAD)) {
00665       LM_ERR("failed to reload pcres\n");
00666       return init_mi_tree(500, MI_INTERNAL_ERR_S, MI_INTERNAL_ERR_LEN);
00667    }
00668    LM_NOTICE("reload success\n");
00669    return init_mi_tree(200, MI_OK_S, MI_OK_LEN);
00670 }

Generated on Thu May 24 10:00:30 2012 for Kamailio - The Open Source SIP Server by  doxygen 1.5.6