60ba14536843c9a1a68d3f62a3435d5a8ca57ce6
[protos/libecoli.git] / lib / ecoli_tk_re_lex.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <stdbool.h>
4 #include <string.h>
5 #include <regex.h>
6 #include <errno.h>
7
8 #include <ecoli_malloc.h>
9 #include <ecoli_log.h>
10 #include <ecoli_test.h>
11 #include <ecoli_strvec.h>
12 #include <ecoli_tk.h>
13 #include <ecoli_tk_many.h>
14 #include <ecoli_tk_or.h>
15 #include <ecoli_tk_str.h>
16 #include <ecoli_tk_int.h>
17 #include <ecoli_tk_re_lex.h>
18
19 struct regexp_pattern {
20         char *pattern;
21         regex_t r;
22         bool keep;
23 };
24
25 struct ec_tk_re_lex {
26         struct ec_tk gen;
27         struct ec_tk *child;
28         struct regexp_pattern *table;
29         size_t len;
30 };
31
32 static struct ec_strvec *
33 tokenize(struct regexp_pattern *table, size_t table_len, const char *str)
34 {
35         struct ec_strvec *strvec = NULL;
36         char *dup = NULL;
37         char c;
38         size_t len, off = 0;
39         size_t i;
40         int ret;
41         regmatch_t pos;
42
43         dup = ec_strdup(str);
44         if (dup == NULL)
45                 goto fail;
46
47         strvec = ec_strvec_new();
48         if (strvec == NULL)
49                 goto fail;
50
51         len = strlen(dup);
52         while (off < len) {
53                 for (i = 0; i < table_len; i++) {
54                         ret = regexec(&table[i].r, &dup[off], 1, &pos, 0);
55                         if (ret != 0)
56                                 continue;
57                         if (pos.rm_so != 0 || pos.rm_eo == 0) {
58                                 ret = -1;
59                                 continue;
60                         }
61
62                         if (table[i].keep == 0)
63                                 break;
64
65                         c = dup[pos.rm_eo + off];
66                         dup[pos.rm_eo + off] = '\0';
67                         ec_log(EC_LOG_DEBUG, "re_lex match <%s>\n", &dup[off]);
68                         if (ec_strvec_add(strvec, &dup[off]) < 0)
69                                 goto fail;
70
71                         dup[pos.rm_eo + off] = c;
72                         break;
73                 }
74
75                 if (ret != 0)
76                         goto fail;
77
78                 off += pos.rm_eo;
79         }
80
81         ec_free(dup);
82         return strvec;
83
84 fail:
85         ec_free(dup);
86         ec_strvec_free(strvec);
87         return NULL;
88 }
89
90 static struct ec_parsed_tk *ec_tk_re_lex_parse(const struct ec_tk *gen_tk,
91         const struct ec_strvec *strvec)
92 {
93         struct ec_tk_re_lex *tk = (struct ec_tk_re_lex *)gen_tk;
94         struct ec_strvec *new_vec = NULL, *match_strvec;
95         struct ec_parsed_tk *parsed_tk = NULL, *child_parsed_tk;
96         const char *str;
97
98         parsed_tk = ec_parsed_tk_new();
99         if (parsed_tk == NULL)
100                 return NULL;
101
102         if (ec_strvec_len(strvec) == 0)
103                 return parsed_tk;
104
105         str = ec_strvec_val(strvec, 0);
106         new_vec = tokenize(tk->table, tk->len, str);
107         if (new_vec == NULL)
108                 goto fail;
109
110         child_parsed_tk = ec_tk_parse_tokens(tk->child, new_vec);
111         if (child_parsed_tk == NULL)
112                 goto fail;
113
114         if (!ec_parsed_tk_matches(child_parsed_tk) ||
115                         ec_parsed_tk_len(child_parsed_tk) !=
116                                 ec_strvec_len(new_vec)) {
117                 ec_strvec_free(new_vec);
118                 ec_parsed_tk_free(child_parsed_tk);
119                 return parsed_tk;
120         }
121         ec_strvec_free(new_vec);
122         new_vec = NULL;
123
124         ec_parsed_tk_add_child(parsed_tk, child_parsed_tk);
125         match_strvec = ec_strvec_ndup(strvec, 0, 1);
126         if (match_strvec == NULL)
127                 goto fail;
128         ec_parsed_tk_set_match(parsed_tk, gen_tk, match_strvec);
129
130         return parsed_tk;
131
132  fail:
133         ec_strvec_free(new_vec);
134         ec_parsed_tk_free(parsed_tk);
135
136         return NULL;
137 }
138
139 static void ec_tk_re_lex_free_priv(struct ec_tk *gen_tk)
140 {
141         struct ec_tk_re_lex *tk = (struct ec_tk_re_lex *)gen_tk;
142         unsigned int i;
143
144         for (i = 0; i < tk->len; i++) {
145                 ec_free(tk->table[i].pattern);
146                 regfree(&tk->table[i].r);
147         }
148
149         ec_free(tk->table);
150         ec_tk_free(tk->child);
151 }
152
153 static struct ec_tk_ops ec_tk_re_lex_ops = {
154         .typename = "re_lex",
155         .parse = ec_tk_re_lex_parse,
156         //.complete = ec_tk_re_lex_complete, //XXX
157         .free_priv = ec_tk_re_lex_free_priv,
158 };
159
160 int ec_tk_re_lex_add(struct ec_tk *gen_tk, const char *pattern, int keep)
161 {
162         struct ec_tk_re_lex *tk = (struct ec_tk_re_lex *)gen_tk;
163         struct regexp_pattern *table;
164         int ret;
165         char *pat_dup = NULL;
166
167         ret = -ENOMEM;
168         pat_dup = ec_strdup(pattern);
169         if (pat_dup == NULL)
170                 goto fail;
171
172         ret = -ENOMEM;
173         table = ec_realloc(tk->table, sizeof(*table) * (tk->len + 1));
174         if (table == NULL)
175                 goto fail;
176
177         ret = regcomp(&table[tk->len].r, pattern, REG_EXTENDED);
178         if (ret != 0) {
179                 ec_log(EC_LOG_ERR,
180                         "Regular expression <%s> compilation failed: %d\n",
181                         pattern, ret);
182                 if (ret == REG_ESPACE)
183                         ret = -ENOMEM;
184                 else
185                         ret = -EINVAL;
186
187                 goto fail;
188         }
189
190         table[tk->len].pattern = pat_dup;
191         table[tk->len].keep = keep;
192         tk->len++;
193         tk->table = table;
194
195         return 0;
196
197 fail:
198         ec_free(pat_dup);
199         return ret;
200 }
201
202 struct ec_tk *ec_tk_re_lex(const char *id, struct ec_tk *child)
203 {
204         struct ec_tk_re_lex *tk = NULL;
205
206         if (child == NULL)
207                 return NULL;
208
209         tk = (struct ec_tk_re_lex *)ec_tk_new(id, &ec_tk_re_lex_ops,
210                 sizeof(*tk));
211         if (tk == NULL) {
212                 ec_tk_free(child);
213                 return NULL;
214         }
215
216         tk->child = child;
217
218         return &tk->gen;
219 }
220
221
222 static int ec_tk_re_lex_testcase(void)
223 {
224         struct ec_tk *tk;
225         int ret = 0;
226
227         tk = ec_tk_re_lex(NULL,
228                 ec_tk_many(NULL,
229                         EC_TK_OR(NULL,
230                                 ec_tk_str(NULL, "foo"),
231                                 ec_tk_str(NULL, "bar"),
232                                 ec_tk_int(NULL, 0, 1000, 0)
233                         ), 0, 0
234                 )
235         );
236         if (tk == NULL) {
237                 ec_log(EC_LOG_ERR, "cannot create tk\n");
238                 return -1;
239         }
240
241         /* XXX add ^ automatically ? */
242         ret |= ec_tk_re_lex_add(tk, "^[a-zA-Z]+", 1);
243         ret |= ec_tk_re_lex_add(tk, "^[0-9]+", 1);
244         ret |= ec_tk_re_lex_add(tk, "^=", 1);
245         ret |= ec_tk_re_lex_add(tk, "^-", 1);
246         ret |= ec_tk_re_lex_add(tk, "^\\+", 1);
247         ret |= ec_tk_re_lex_add(tk, "^[         ]+", 0);
248         if (ret != 0) {
249                 ec_log(EC_LOG_ERR, "cannot add regexp to token\n");
250                 ec_tk_free(tk);
251                 return -1;
252         }
253
254         ret |= EC_TEST_CHECK_TK_PARSE(tk, 1, "  foo bar  324 bar234");
255         ret |= EC_TEST_CHECK_TK_PARSE(tk, 1, "foo bar324");
256         ret |= EC_TEST_CHECK_TK_PARSE(tk, 1, "");
257         ret |= EC_TEST_CHECK_TK_PARSE(tk, -1, "foobar");
258
259         ec_tk_free(tk);
260
261         return ret;
262 }
263
264 static struct ec_test ec_tk_re_lex_test = {
265         .name = "tk_re_lex",
266         .test = ec_tk_re_lex_testcase,
267 };
268
269 EC_REGISTER_TEST(ec_tk_re_lex_test);