parse_content.c

Go to the documentation of this file.
00001 /*
00002  * $Id: parse_content.c 5335 2008-12-13 12:18:05Z klaus_darilion $
00003  *
00004  * Copyright (C) 2001-2003 FhG Fokus
00005  *
00006  * This file is part of Kamailio, a free SIP server.
00007  *
00008  * Kamailio is free software; you can redistribute it and/or modify
00009  * it under the terms of the GNU General Public License as published by
00010  * the Free Software Foundation; either version 2 of the License, or
00011  * (at your option) any later version
00012  *
00013  * Kamailio is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License 
00019  * along with this program; if not, write to the Free Software 
00020  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00021  *
00022  * History:
00023  * 2003-08-04 parse_content_type_hdr separates type from subtype inside
00024  * the mime type (bogdan)
00025  * 2003-08-04 CPL subtype added (bogdan)
00026  * 2003-08-05 parse_accept_hdr function added (bogdan)
00027  */
00028 
00029 /*!
00030  * \file
00031  * \brief Content header parser
00032  * \ingroup parser
00033  */
00034 
00035 #include <stdio.h>
00036 #include <stdlib.h>
00037 #include <sys/types.h>
00038 #include <unistd.h>
00039 #include "../mem/mem.h"
00040 #include "../dprint.h"
00041 #include "../str.h"
00042 #include "../ut.h"
00043 #include "parse_content.h"
00044 
00045 
00046 #define is_mime_char(_c_) \
00047    (isalpha((int)_c_) || (_c_)=='-' || (_c_)=='+' || (_c_)=='.')
00048 #define is_char_equal(_c_,_cs_) \
00049    ( (isalpha((int)_c_)?(((_c_)|0x20)==(_cs_)):((_c_)==(_cs_)))==1 )
00050 
00051 
00052 /*!
00053  * Node of the type's tree; this tree contains all the known types;
00054  */
00055 typedef struct type_node_s {
00056    char c;                      /* char contained by this node */
00057    unsigned char final;         /* says what mime type/subtype was detected
00058                                  * if string ends at this node */
00059    unsigned char nr_sons;       /* the number of sub-nodes */
00060    int next;                    /* the next sibling node */
00061 }type_node_t;
00062 
00063 
00064 static type_node_t type_tree[] = {
00065    {'t',TYPE_UNKNOWN,1,4}, /* 0 */
00066       {'e',TYPE_UNKNOWN,1,-1},
00067          {'x',TYPE_UNKNOWN,1,-1},
00068             {'t',TYPE_TEXT,0,-1},
00069    {'m',TYPE_UNKNOWN,2,19}, /* 4 */
00070       {'e',TYPE_UNKNOWN,1,11}, /* 5 */
00071          {'s',TYPE_UNKNOWN,1,-1},
00072             {'s',TYPE_UNKNOWN,1,-1},
00073                {'a',TYPE_UNKNOWN,1,-1},
00074                   {'g',TYPE_UNKNOWN,1,-1},
00075                      {'e',TYPE_MESSAGE,0,-1},
00076       {'u',TYPE_UNKNOWN,1,-1}, /* 11 */
00077          {'l',TYPE_UNKNOWN,1,-1},
00078             {'t',TYPE_UNKNOWN,1,-1},
00079                {'i',TYPE_UNKNOWN,1,-1},
00080                   {'p',TYPE_UNKNOWN,1,-1},
00081                      {'a',TYPE_UNKNOWN,1,-1},
00082                         {'r',TYPE_UNKNOWN,1,-1},
00083                            {'t',TYPE_MULTIPART,0,-1},
00084    {'a',TYPE_UNKNOWN,1,-1}, /* 19 */
00085       {'p',TYPE_UNKNOWN,1,-1},
00086          {'p',TYPE_UNKNOWN,1,-1},
00087             {'l',TYPE_UNKNOWN,1,-1},
00088                {'i',TYPE_UNKNOWN,1,-1},
00089                   {'c',TYPE_UNKNOWN,1,-1},
00090                      {'a',TYPE_UNKNOWN,1,-1},
00091                         {'t',TYPE_UNKNOWN,1,-1},
00092                            {'i',TYPE_UNKNOWN,1,-1},
00093                               {'o',TYPE_UNKNOWN,1,-1},
00094                                  {'n',TYPE_APPLICATION,0,-1},
00095    };
00096 
00097 
00098 static type_node_t subtype_tree[] = {
00099         {'p',SUBTYPE_UNKNOWN,2,12},  /* 0 */
00100       {'l',SUBTYPE_UNKNOWN,1,5},
00101          {'a',SUBTYPE_UNKNOWN,1,-1},
00102             {'i',SUBTYPE_UNKNOWN,1,-1},
00103                {'n',SUBTYPE_PLAIN,0,-1},
00104       {'i',SUBTYPE_UNKNOWN,1,-1}, /* 5 */
00105          {'d',SUBTYPE_UNKNOWN,1,-1},
00106             {'f',SUBTYPE_UNKNOWN,1,-1},
00107                {'+',SUBTYPE_UNKNOWN,1,-1},
00108                   {'x',SUBTYPE_UNKNOWN,1,-1},
00109                      {'m',SUBTYPE_UNKNOWN,1,-1},
00110                         {'l',SUBTYPE_PIDFXML,0,-1},
00111    {'s',SUBTYPE_UNKNOWN,2,36}, /* 12 */
00112       {'d',SUBTYPE_UNKNOWN,1,15},
00113          {'p',SUBTYPE_SDP,0,-1},
00114            {'i',SUBTYPE_UNKNOWN,1,-1},  /* 15 */
00115                    {'m',SUBTYPE_UNKNOWN,1,-1},
00116                        {'p',SUBTYPE_UNKNOWN,1,-1},
00117                            {'l',SUBTYPE_UNKNOWN,1,-1},
00118                                {'e',SUBTYPE_UNKNOWN,1,-1},
00119                                    {'-',SUBTYPE_UNKNOWN,1,-1},
00120                                        {'m',SUBTYPE_UNKNOWN,1,-1},
00121                                            {'e',SUBTYPE_UNKNOWN,1,-1},
00122                                                {'s',SUBTYPE_UNKNOWN,1,-1},
00123                                                    {'s',SUBTYPE_UNKNOWN,1,-1},
00124                                                        {'a',SUBTYPE_UNKNOWN,1,-1},
00125                                                            {'g',SUBTYPE_UNKNOWN,1,-1},
00126                                                                {'e',SUBTYPE_UNKNOWN,1,-1},
00127                                                                    {'-',SUBTYPE_UNKNOWN,1,-1},
00128                                                                        {'s',SUBTYPE_UNKNOWN,1,-1},
00129                                                                            {'u',SUBTYPE_UNKNOWN,1,-1},
00130                                                                                {'m',SUBTYPE_UNKNOWN,1,-1},
00131                                                                                    {'m',SUBTYPE_UNKNOWN,1,-1},
00132                                                                                        {'a',SUBTYPE_UNKNOWN,1,-1},
00133                                                                                            {'r',SUBTYPE_UNKNOWN,1,-1},
00134                                                                                                {'y',SUBTYPE_SMS,0,-1},
00135    {'c',SUBTYPE_UNKNOWN,1,45}, /* 36 */
00136       {'p',SUBTYPE_UNKNOWN,2,-1},
00137          {'i',SUBTYPE_UNKNOWN,1,40},
00138             {'m',SUBTYPE_CPIM,0,-1},
00139                    {'l',SUBTYPE_UNKNOWN,1,-1}, /* 40 */
00140             {'+',SUBTYPE_UNKNOWN,1,-1},
00141                {'x',SUBTYPE_UNKNOWN,1,-1},
00142                   {'m',SUBTYPE_UNKNOWN,1,-1},
00143                      {'l',SUBTYPE_CPLXML,0,-1},
00144    {'r',SUBTYPE_UNKNOWN,2,59}, /* 45 */
00145       {'l',SUBTYPE_UNKNOWN,1,53},
00146          {'m',SUBTYPE_UNKNOWN,1,-1},
00147             {'i',SUBTYPE_UNKNOWN,1,-1},
00148                {'+',SUBTYPE_UNKNOWN,1,-1},
00149                   {'x',SUBTYPE_UNKNOWN,1,-1},
00150                      {'m',SUBTYPE_UNKNOWN,1,-1},
00151                         {'l',SUBTYPE_RLMIXML,0,-1},
00152       {'e',SUBTYPE_UNKNOWN,1,-1}, /* 53 */
00153          {'l',SUBTYPE_UNKNOWN,1,-1},
00154             {'a',SUBTYPE_UNKNOWN,1,-1},
00155                {'t',SUBTYPE_UNKNOWN,1,-1},
00156                   {'e',SUBTYPE_UNKNOWN,1,-1},
00157                      {'d',SUBTYPE_RELATED,0,-1},
00158    {'l',SUBTYPE_UNKNOWN,1,68}, /* 59 */
00159       {'p',SUBTYPE_UNKNOWN,1,-1},
00160          {'i',SUBTYPE_UNKNOWN,1,-1},
00161             {'d',SUBTYPE_UNKNOWN,1,-1},
00162                {'f',SUBTYPE_UNKNOWN,1,-1},
00163                   {'+',SUBTYPE_UNKNOWN,1,-1},
00164                      {'x',SUBTYPE_UNKNOWN,1,-1},
00165                         {'m',SUBTYPE_UNKNOWN,1,-1},
00166                            {'l',SUBTYPE_LPIDFXML,0,-1},
00167    {'w',SUBTYPE_UNKNOWN,1,83}, /* 68 */
00168       {'a',SUBTYPE_UNKNOWN,1,-1},
00169          {'t',SUBTYPE_UNKNOWN,1,-1},
00170             {'c',SUBTYPE_UNKNOWN,1,-1},
00171                {'h',SUBTYPE_UNKNOWN,1,-1},
00172                   {'e',SUBTYPE_UNKNOWN,1,-1},
00173                      {'r',SUBTYPE_UNKNOWN,1,-1},
00174                         {'i',SUBTYPE_UNKNOWN,1,-1},
00175                            {'n',SUBTYPE_UNKNOWN,1,-1},
00176                               {'f',SUBTYPE_UNKNOWN,1,-1},
00177                                  {'o',SUBTYPE_UNKNOWN,1,-1},
00178                                     {'+',SUBTYPE_UNKNOWN,1,-1},
00179                                        {'x',SUBTYPE_UNKNOWN,1,-1},
00180                                           {'m',SUBTYPE_UNKNOWN,1,-1},
00181                                              {'l',SUBTYPE_WATCHERINFOXML,0,-1},
00182    {'x',SUBTYPE_UNKNOWN,2,105}, /* 83 */
00183       {'p',SUBTYPE_UNKNOWN,1,92},
00184          {'i',SUBTYPE_UNKNOWN,1,-1},
00185             {'d',SUBTYPE_UNKNOWN,1,-1},
00186                {'f',SUBTYPE_UNKNOWN,1,-1},
00187                   {'+',SUBTYPE_UNKNOWN,1,-1},
00188                      {'x',SUBTYPE_UNKNOWN,1,-1},
00189                         {'m',SUBTYPE_UNKNOWN,1,-1},
00190                            {'l',SUBTYPE_XPIDFXML,0,-1},
00191       {'m',SUBTYPE_UNKNOWN,1,-1}, /* 92 */
00192          {'l',SUBTYPE_UNKNOWN,1,-1},
00193             {'+',SUBTYPE_UNKNOWN,1,-1},
00194                {'m',SUBTYPE_UNKNOWN,1,-1},
00195                   {'s',SUBTYPE_UNKNOWN,1,-1},
00196                      {'r',SUBTYPE_UNKNOWN,1,-1},
00197                         {'t',SUBTYPE_UNKNOWN,1,-1},
00198                            {'c',SUBTYPE_UNKNOWN,1,-1},
00199                               {'.',SUBTYPE_UNKNOWN,1,-1},
00200                                  {'p',SUBTYPE_UNKNOWN,1,-1},
00201                                     {'i',SUBTYPE_UNKNOWN,1,-1}, 
00202                                        {'d',SUBTYPE_UNKNOWN,1,-1},
00203                                           {'f',SUBTYPE_XML_MSRTC_PIDF,0,-1},
00204    {'e',SUBTYPE_UNKNOWN,1,118}, /* 105 */
00205       {'x',SUBTYPE_UNKNOWN,1,-1},
00206          {'t',SUBTYPE_UNKNOWN,1,-1},
00207             {'e',SUBTYPE_UNKNOWN,1,-1},
00208                {'r',SUBTYPE_UNKNOWN,1,-1},
00209                   {'n',SUBTYPE_UNKNOWN,1,-1},
00210                      {'a',SUBTYPE_UNKNOWN,1,-1},
00211                         {'l',SUBTYPE_UNKNOWN,1,-1},
00212                            {'-',SUBTYPE_UNKNOWN,1,-1},
00213                               {'b',SUBTYPE_UNKNOWN,1,-1},
00214                                  {'o',SUBTYPE_UNKNOWN,1,-1},
00215                                     {'d',SUBTYPE_UNKNOWN,1,-1},
00216                                        {'y',SUBTYPE_EXTERNAL_BODY,0,-1},
00217    {'m',SUBTYPE_UNKNOWN,1,-1}, /* 118 */
00218             {'i',SUBTYPE_UNKNOWN,1,-1},
00219          {'x',SUBTYPE_UNKNOWN,1,-1},
00220             {'e',SUBTYPE_UNKNOWN,1,-1},
00221                {'d',SUBTYPE_MIXED,0,-1},
00222 
00223         };
00224 
00225 
00226 
00227 char* parse_content_length( char* buffer, char* end, int* length)
00228 {
00229    int number;
00230    char *p;
00231    int  size;
00232 
00233    p = buffer;
00234    /* search the begining of the number */
00235    while ( p<end && (*p==' ' || *p=='\t' ||
00236    (*p=='\n' && (*(p+1)==' '||*(p+1)=='\t')) ))
00237       p++;
00238    if (p==end)
00239       goto error;
00240    /* parse the number */
00241    size = 0;
00242    number = 0;
00243    while (p<end && *p>='0' && *p<='9') {
00244       number = number*10 + (*p)-'0';
00245       size ++;
00246       p++;
00247    }
00248    if (p==end || size==0)
00249       goto error;
00250    /* now we should have only spaces at the end */
00251    while ( p<end && (*p==' ' || *p=='\t' ||
00252    (*p=='\n' && (*(p+1)==' '||*(p+1)=='\t')) ))
00253       p++;
00254    if (p==end)
00255       goto error;
00256    /* the header ends proper? */
00257    if ( (*(p++)!='\n') && (*(p-1)!='\r' || *(p++)!='\n' ) )
00258       goto error;
00259 
00260    *length = number;
00261    return p;
00262 error:
00263    LM_ERR("parse error near char [%d][%c]\n",*p,*p);
00264    return 0;
00265 }
00266 
00267 
00268 
00269 char* decode_mime_type(char *start, char *end, unsigned int *mime_type)
00270 {
00271    int node;
00272    char *mark;
00273    char *p;
00274 
00275    p = start;
00276 
00277    /* search the begining of the type */
00278    while ( p<end && (*p==' ' || *p=='\t' ||
00279    (*p=='\n' && (*(p+1)==' '||*(p+1)=='\t')) ))
00280       p++;
00281    if (p==end)
00282       goto error;
00283 
00284    /* parse the type */
00285    if (*p=='*') {
00286       *mime_type = TYPE_ALL<<16;
00287       p++;
00288    } else {
00289       node = 0;
00290       mark = p;
00291       while (p<end && is_mime_char(*p)  ) {
00292          while ( node!=-1 && !is_char_equal(*p,type_tree[node].c) ){
00293             node = type_tree[node].next;
00294          }
00295          if (node!=-1 && type_tree[node].nr_sons)
00296             node++;
00297          p++;
00298       }
00299       if (p==end || mark==p)
00300          goto error;
00301       if (node!=-1)
00302          *mime_type = type_tree[node].final<<16;
00303       else
00304          *mime_type = TYPE_UNKNOWN<<16;
00305    }
00306 
00307    /* search the '/' separator */
00308    while ( p<end && (*p==' ' || *p=='\t' ||
00309    (*p=='\n' && (*(p+1)==' '||*(p+1)=='\t')) ))
00310       p++;
00311    if ( p==end || *(p++)!='/')
00312       goto error;
00313 
00314    /* search the begining of the sub-type */
00315    while ( p<end && (*p==' ' || *p=='\t' ||
00316    (*p=='\n' && (*(p+1)==' '||*(p+1)=='\t')) ))
00317       p++;
00318    if (p==end)
00319       goto error;
00320 
00321    /* parse the sub-type */
00322    if (*p=='*') {
00323       *mime_type |= SUBTYPE_ALL;
00324       p++;
00325    } else {
00326       node = 0;
00327       mark = p;
00328       while (p<end && (is_mime_char(*p) || *p == '_')) {
00329          while(node!=-1 && !is_char_equal(*p,subtype_tree[node].c) )
00330             node = subtype_tree[node].next;
00331          if (node!=-1 && subtype_tree[node].nr_sons)
00332             node++;
00333          p++;
00334       }
00335       if (p==mark)
00336          goto error;
00337       if (node!=-1)
00338          *mime_type |= subtype_tree[node].final;
00339       else
00340          *mime_type |= SUBTYPE_UNKNOWN;
00341    }
00342 
00343    /* now its possible to have some spaces */
00344    while ( p<end && (*p==' ' || *p=='\t' ||
00345    (*p=='\n' && (*(p+1)==' '||*(p+1)=='\t')) ))
00346       p++;
00347 
00348    /* if there are params, ignore them!! -> eat everything to
00349     * the end or to the first ',' */
00350    if ( p<end && *p==';' )
00351       for(p++; p<end && *p!=','; p++);
00352 
00353    /* is this the correct end? */
00354    if (p!=end && *p!=',' )
00355       goto error;
00356 
00357    /* check the format of the decoded mime */
00358    if ((*mime_type)>>16==TYPE_ALL && ((*mime_type)&0x00ff)!=SUBTYPE_ALL) {
00359       LM_ERR("invalid mime format found "
00360          " <*/submime> in [%.*s]!!\n", (int)(end-start),start);
00361       return 0;
00362    }
00363 
00364    return p;
00365 error:
00366    LM_ERR("parse error near in [%.*s] char"
00367       "[%d][%c] offset=%d\n", (int)(end-start),start,*p,*p,(int)(p-start));
00368    return 0;
00369 }
00370 
00371 
00372 
00373 /* returns: > 0 mime found
00374  *          = 0 hdr not found
00375  *          =-1 error */
00376 int parse_content_type_hdr( struct sip_msg *msg )
00377 {
00378    char *end;
00379    char *ret;
00380    unsigned int  mime;
00381 
00382    /* is the header already found? */
00383    if ( msg->content_type==0 ) {
00384       /* if not, found it */
00385       if ( parse_headers(msg, HDR_CONTENTTYPE_F, 0)==-1)
00386          goto error;
00387       if ( msg->content_type==0 ) {
00388          LM_DBG("missing Content-Type header\n");
00389          return 0;
00390       }
00391    }
00392 
00393    /* maybe the header is already parsed! */
00394    if ( msg->content_type->parsed!=0)
00395       return get_content_type(msg);
00396 
00397    /* it seams we have to parse it! :-( */
00398    end = msg->content_type->body.s + msg->content_type->body.len;
00399    ret = decode_mime_type(msg->content_type->body.s, end , &mime);
00400    if (ret==0)
00401       goto error;
00402    if (ret!=end) {
00403       LM_ERR("the header CONTENT_TYPE contains "
00404          "more then one mime type :-(!\n");
00405       goto error;
00406    }
00407    if ((mime&0x00ff)==SUBTYPE_ALL || (mime>>16)==TYPE_ALL) {
00408       LM_ERR("invalid mime with wildcard '*' in Content-Type hdr!\n");
00409       goto error;
00410    }
00411 
00412    msg->content_type->parsed = (void*)(unsigned long)mime;
00413    return mime;
00414 
00415 error:
00416    return -1;
00417 }
00418 
00419 
00420 
00421 /* returns: > 0 ok
00422  *          = 0 hdr not found
00423  *          = -1 error */
00424 int parse_accept_hdr( struct sip_msg *msg )
00425 {
00426    static unsigned int mimes[MAX_MIMES_NR];
00427    int nr_mimes;
00428    unsigned int mime;
00429    char *end;
00430    char *ret;
00431 
00432    /* is the header already found? */
00433    if ( msg->accept==0 ) {
00434       /* if not, found it */
00435       if ( parse_headers(msg, HDR_ACCEPT_F, 0)==-1)
00436          goto error;
00437       if ( msg->accept==0 ) {
00438          LM_DBG("missing Accept header\n");
00439          return 0;
00440       }
00441    }
00442 
00443    /* maybe the header is already parsed! */
00444    if ( msg->accept->parsed!=0)
00445       return 1;
00446 
00447    /* it seams we have to parse it! :-( */
00448    ret = msg->accept->body.s;
00449    end = ret + msg->accept->body.len;
00450    nr_mimes = 0;
00451    while (1){
00452       ret = decode_mime_type(ret, end , &mime);
00453       if (ret==0)
00454          goto error;
00455       /* a new mime was found  -> put it into array */
00456       if (nr_mimes==MAX_MIMES_NR) {
00457          LM_ERR("accept hdr contains more than"
00458             " %d mime type -> buffer overflow!!\n",MAX_MIMES_NR);
00459          goto error;
00460       }
00461       mimes[nr_mimes++] = mime;
00462       /* is another mime following? */
00463       if (ret==end )
00464          break;
00465       /* parse the mime separator ',' */
00466       if (*ret!=',' || ret+1==end) {
00467          LM_ERR("parse error between mimes at "
00468             "char <%x> (offset=%d) in <%.*s>!\n",
00469             *ret, (int)(ret-msg->accept->body.s),
00470             msg->accept->body.len, msg->accept->body.s);
00471          goto error;
00472       }
00473       /* skip the ',' */
00474       ret++;
00475    }
00476 
00477    /* copy and link the mime buffer into the message */
00478    msg->accept->parsed = (void*)pkg_malloc((nr_mimes+1)*sizeof(int));
00479    if (msg->accept->parsed==0) {
00480       LM_ERR("no more pkg memory\n");
00481       goto error;
00482    }
00483    memcpy(msg->accept->parsed,mimes,nr_mimes*sizeof(int));
00484    /* make the buffer null terminated */
00485    ((int*)msg->accept->parsed)[nr_mimes] = 0;
00486 
00487    return 1;
00488 error:
00489    return -1;
00490 }
00491 

Generated on Thu May 24 00:00:28 2012 for Kamailio - The Open Source SIP Server by  doxygen 1.5.6