ITS
xer_support.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2003, 2004 X/IO Labs, xiolabs.com.
3  * Copyright (c) 2003, 2004, 2005 Lev Walkin <vlm@lionet.info>.
4  * All rights reserved.
5  * Redistribution and modifications are permitted subject to BSD license.
6  */
7 #include <asn_system.h>
8 #include <xer_support.h>
9 
10 /* Parser states */
11 typedef enum {
18  ST_COMMENT_WAIT_DASH1, /* "<!--"[1] */
19  ST_COMMENT_WAIT_DASH2, /* "<!--"[2] */
21  ST_COMMENT_CLO_DASH2, /* "-->"[0] */
22  ST_COMMENT_CLO_RT /* "-->"[1] */
23 } pstate_e;
24 
26  PXML_TEXT,
31 };
32 
33 
34 static int
35 _charclass[256] = {
36  0,0,0,0,0,0,0,0, 0,1,1,0,1,1,0,0,
37  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
38  1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
39  2,2,2,2,2,2,2,2, 2,2,0,0,0,0,0,0, /* 01234567 89 */
40  0,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, /* ABCDEFG HIJKLMNO */
41  3,3,3,3,3,3,3,3, 3,3,3,0,0,0,0,0, /* PQRSTUVW XYZ */
42  0,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, /* abcdefg hijklmno */
43  3,3,3,3,3,3,3,3, 3,3,3,0,0,0,0,0 /* pqrstuvw xyz */
44 };
45 #define WHITESPACE(c) (_charclass[(unsigned char)(c)] == 1)
46 #define ALNUM(c) (_charclass[(unsigned char)(c)] >= 2)
47 #define ALPHA(c) (_charclass[(unsigned char)(c)] == 3)
48 
49 /* Aliases for characters, ASCII/UTF-8 */
50 #define EXCLAM 0x21 /* '!' */
51 #define CQUOTE 0x22 /* '"' */
52 #define CDASH 0x2d /* '-' */
53 #define CSLASH 0x2f /* '/' */
54 #define LANGLE 0x3c /* '<' */
55 #define CEQUAL 0x3d /* '=' */
56 #define RANGLE 0x3e /* '>' */
57 #define CQUEST 0x3f /* '?' */
58 
59 /* Invoke token callback */
60 #define TOKEN_CB_CALL(type, _ns, _current_too, _final) do { \
61  int _ret; \
62  pstate_e ns = _ns; \
63  ssize_t _sz = (p - chunk_start) + _current_too; \
64  if (!_sz) { \
65  /* Shortcut */ \
66  state = _ns; \
67  break; \
68  } \
69  _ret = cb(type, chunk_start, _sz, key); \
70  if(_ret < _sz) { \
71  if(_current_too && _ret == -1) \
72  state = ns; \
73  goto finish; \
74  } \
75  chunk_start = p + _current_too; \
76  state = ns; \
77  } while(0)
78 
79 #define TOKEN_CB(_type, _ns, _current_too) \
80  TOKEN_CB_CALL(_type, _ns, _current_too, 0)
81 
82 #define TOKEN_CB_FINAL(_type, _ns, _current_too) \
83  TOKEN_CB_CALL(final_chunk_type[_type], _ns, _current_too, 1)
84 
85 /*
86  * Parser itself
87  */
88 ssize_t pxml_parse(int *stateContext, const void *xmlbuf, size_t size, pxml_callback_f *cb, void *key) {
89  pstate_e state = (pstate_e)*stateContext;
90  const char *chunk_start = (const char *)xmlbuf;
91  const char *p = chunk_start;
92  const char *end = p + size;
93 
94  for(; p < end; p++) {
95  int C = *(const unsigned char *)p;
96  switch(state) {
97  case ST_TEXT:
98  /*
99  * Initial state: we're in the middle of some text,
100  * or just have started.
101  */
102  if (C == LANGLE)
103  /* We're now in the tag, probably */
105  break;
106  case ST_TAG_START:
107  if (ALPHA(C) || (C == CSLASH))
108  state = ST_TAG_BODY;
109  else if (C == EXCLAM)
110  state = ST_COMMENT_WAIT_DASH1;
111  else
112  /*
113  * Not characters and not whitespace.
114  * Must be something like "3 < 4".
115  */
116  TOKEN_CB(PXML_TEXT, ST_TEXT, 1);/* Flush as data */
117  break;
118  case ST_TAG_BODY:
119  switch(C) {
120  case RANGLE:
121  /* End of the tag */
123  break;
124  case LANGLE:
125  /*
126  * The previous tag wasn't completed, but still
127  * recognized as valid. (Mozilla-compatible)
128  */
130  break;
131  case CEQUAL:
132  state = ST_TAG_QUOTE_WAIT;
133  break;
134  }
135  break;
136  case ST_TAG_QUOTE_WAIT:
137  /*
138  * State after the equal sign ("=") in the tag.
139  */
140  switch(C) {
141  case CQUOTE:
142  state = ST_TAG_QUOTED_STRING;
143  break;
144  case RANGLE:
145  /* End of the tag */
147  break;
148  default:
149  if(!WHITESPACE(C))
150  /* Unquoted string value */
151  state = ST_TAG_UNQUOTED_STRING;
152  }
153  break;
155  /*
156  * Tag attribute's string value in quotes.
157  */
158  if(C == CQUOTE) {
159  /* Return back to the tag state */
160  state = ST_TAG_BODY;
161  }
162  break;
164  if(C == RANGLE) {
165  /* End of the tag */
167  } else if(WHITESPACE(C)) {
168  /* Return back to the tag state */
169  state = ST_TAG_BODY;
170  }
171  break;
173  if(C == CDASH) {
174  state = ST_COMMENT_WAIT_DASH2;
175  } else {
176  /* Some ordinary tag. */
177  state = ST_TAG_BODY;
178  }
179  break;
181  if(C == CDASH) {
182  /* Seen "<--" */
183  state = ST_COMMENT;
184  } else {
185  /* Some ordinary tag */
186  state = ST_TAG_BODY;
187  }
188  break;
189  case ST_COMMENT:
190  if(C == CDASH) {
191  state = ST_COMMENT_CLO_DASH2;
192  }
193  break;
195  if(C == CDASH) {
196  state = ST_COMMENT_CLO_RT;
197  } else {
198  /* This is not an end of a comment */
199  state = ST_COMMENT;
200  }
201  break;
202  case ST_COMMENT_CLO_RT:
203  if(C == RANGLE) {
205  } else if(C == CDASH) {
206  /* Maintain current state, still waiting for '>' */
207  } else {
208  state = ST_COMMENT;
209  }
210  break;
211  } /* switch(*ptr) */
212  } /* for() */
213 
214  /*
215  * Flush the partially processed chunk, state permitting.
216  */
217  if(p - chunk_start) {
218  switch (state) {
219  case ST_COMMENT:
220  TOKEN_CB(PXML_COMMENT, state, 0);
221  break;
222  case ST_TEXT:
223  TOKEN_CB(PXML_TEXT, state, 0);
224  break;
225  default: break; /* a no-op */
226  }
227  }
228 
229 finish:
230  *stateContext = (int)state;
231  return chunk_start - (const char *)xmlbuf;
232 }
233 
#define CQUOTE
Definition: xer_support.c:51
#define TOKEN_CB_FINAL(_type, _ns, _current_too)
Definition: xer_support.c:82
#define ALPHA(c)
Definition: xer_support.c:47
#define CDASH
Definition: xer_support.c:52
pxml_chunk_type_e
Definition: xer_support.h:18
ssize_t pxml_parse(int *stateContext, const void *xmlbuf, size_t size, pxml_callback_f *cb, void *key)
Definition: xer_support.c:88
static int _charclass[256]
Definition: xer_support.c:35
int() pxml_callback_f(pxml_chunk_type_e _type, const void *_chunk_data, size_t _chunk_size, void *_key)
Definition: xer_support.h:38
#define LANGLE
Definition: xer_support.c:54
static pxml_chunk_type_e final_chunk_type[]
Definition: xer_support.c:25
#define RANGLE
Definition: xer_support.c:56
#define CEQUAL
Definition: xer_support.c:55
#define CSLASH
Definition: xer_support.c:53
#define EXCLAM
Definition: xer_support.c:50
#define TOKEN_CB(_type, _ns, _current_too)
Definition: xer_support.c:79
#define WHITESPACE(c)
Definition: xer_support.c:45
pstate_e
Definition: xer_support.c:11