726c6cf75423ddfacf2f958d190ceb7be7e400cb
[protos/libecoli.git] / src / ecoli_node_re_lex.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016, Olivier MATZ <zer0@droids-corp.org>
3  */
4
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <stdbool.h>
8 #include <string.h>
9 #include <regex.h>
10 #include <errno.h>
11
12 #include <ecoli_malloc.h>
13 #include <ecoli_log.h>
14 #include <ecoli_test.h>
15 #include <ecoli_strvec.h>
16 #include <ecoli_dict.h>
17 #include <ecoli_node.h>
18 #include <ecoli_complete.h>
19 #include <ecoli_parse.h>
20 #include <ecoli_config.h>
21 #include <ecoli_node_many.h>
22 #include <ecoli_node_or.h>
23 #include <ecoli_node_str.h>
24 #include <ecoli_node_int.h>
25 #include <ecoli_node_re_lex.h>
26
27 EC_LOG_TYPE_REGISTER(node_re_lex);
28
29 struct regexp_pattern {
30         char *pattern;
31         char *attr_name;
32         regex_t r;
33         bool keep;
34 };
35
36 struct ec_node_re_lex {
37         struct ec_node *child;
38         struct regexp_pattern *table;
39         size_t len;
40 };
41
42 static struct ec_strvec *
43 tokenize(struct regexp_pattern *table, size_t table_len, const char *str)
44 {
45         struct ec_strvec *strvec = NULL;
46         struct ec_dict *attrs = NULL;
47         char *dup = NULL;
48         char c;
49         size_t len, off = 0;
50         size_t i;
51         int ret;
52         regmatch_t pos;
53
54         dup = ec_strdup(str);
55         if (dup == NULL)
56                 goto fail;
57
58         strvec = ec_strvec();
59         if (strvec == NULL)
60                 goto fail;
61
62         len = strlen(dup);
63         while (off < len) {
64                 for (i = 0; i < table_len; i++) {
65                         ret = regexec(&table[i].r, &dup[off], 1, &pos, 0);
66                         if (ret != 0)
67                                 continue;
68                         if (pos.rm_so != 0 || pos.rm_eo == 0) {
69                                 ret = -1;
70                                 continue;
71                         }
72
73                         if (table[i].keep == 0)
74                                 break;
75
76                         c = dup[pos.rm_eo + off];
77                         dup[pos.rm_eo + off] = '\0';
78                         EC_LOG(EC_LOG_DEBUG, "re_lex match <%s>\n", &dup[off]);
79                         if (ec_strvec_add(strvec, &dup[off]) < 0)
80                                 goto fail;
81
82                         if (table[i].attr_name != NULL) {
83                                 attrs = ec_dict();
84                                 if (attrs == NULL)
85                                         goto fail;
86                                 if (ec_dict_set(attrs, table[i].attr_name,
87                                                 NULL, NULL) < 0)
88                                         goto fail;
89                                 if (ec_strvec_set_attrs(strvec,
90                                                 ec_strvec_len(strvec) - 1,
91                                                 attrs) < 0) {
92                                         attrs = NULL;
93                                         goto fail;
94                                 }
95                                 attrs = NULL;
96                         }
97
98                         dup[pos.rm_eo + off] = c;
99                         break;
100                 }
101
102                 if (ret != 0)
103                         goto fail;
104
105                 off += pos.rm_eo;
106         }
107
108         ec_free(dup);
109         return strvec;
110
111 fail:
112         ec_free(dup);
113         ec_strvec_free(strvec);
114         return NULL;
115 }
116
117 static int
118 ec_node_re_lex_parse(const struct ec_node *node,
119                 struct ec_pnode *pstate,
120                 const struct ec_strvec *strvec)
121 {
122         struct ec_node_re_lex *priv = ec_node_priv(node);
123         struct ec_strvec *new_vec = NULL;
124         struct ec_pnode *child_parse;
125         const char *str;
126         int ret;
127
128         if (priv->child == NULL) {
129                 errno = EINVAL;
130                 goto fail;
131         }
132
133         if (ec_strvec_len(strvec) == 0) {
134                 new_vec = ec_strvec();
135         } else {
136                 str = ec_strvec_val(strvec, 0);
137                 new_vec = tokenize(priv->table, priv->len, str);
138         }
139         if (new_vec == NULL)
140                 goto fail;
141
142         ret = ec_parse_child(priv->child, pstate, new_vec);
143         if (ret < 0)
144                 goto fail;
145
146         if ((unsigned)ret == ec_strvec_len(new_vec)) {
147                 ret = 1;
148         } else if (ret != EC_PARSE_NOMATCH) {
149                 child_parse = ec_pnode_get_last_child(pstate);
150                 ec_pnode_unlink_child(pstate, child_parse);
151                 ec_pnode_free(child_parse);
152                 ret = EC_PARSE_NOMATCH;
153         }
154
155         ec_strvec_free(new_vec);
156         new_vec = NULL;
157
158         return ret;
159
160  fail:
161         ec_strvec_free(new_vec);
162         return -1;
163 }
164
165 static void ec_node_re_lex_free_priv(struct ec_node *node)
166 {
167         struct ec_node_re_lex *priv = ec_node_priv(node);
168         unsigned int i;
169
170         ec_node_free(priv->child);
171         for (i = 0; i < priv->len; i++) {
172                 ec_free(priv->table[i].pattern);
173                 ec_free(priv->table[i].attr_name);
174                 regfree(&priv->table[i].r);
175         }
176
177         ec_free(priv->table);
178 }
179
180 static size_t
181 ec_node_re_lex_get_children_count(const struct ec_node *node)
182 {
183         struct ec_node_re_lex *priv = ec_node_priv(node);
184
185         if (priv->child)
186                 return 1;
187         return 0;
188 }
189
190 static int
191 ec_node_re_lex_get_child(const struct ec_node *node, size_t i,
192                         struct ec_node **child, unsigned int *refs)
193 {
194         struct ec_node_re_lex *priv = ec_node_priv(node);
195
196         if (i >= 1)
197                 return -1;
198
199         *child = priv->child;
200         *refs = 2;
201         return 0;
202 }
203
204 static const struct ec_config_schema ec_node_re_lex_dict[] = {
205         {
206                 .key = "pattern",
207                 .desc = "The pattern to match.",
208                 .type = EC_CONFIG_TYPE_STRING,
209         },
210         {
211                 .key = "keep",
212                 .desc = "Whether to keep or drop the string matching "
213                 "the regular expression.",
214                 .type = EC_CONFIG_TYPE_BOOL,
215         },
216         {
217                 .key = "attr",
218                 .desc = "The optional attribute name to attach.",
219                 .type = EC_CONFIG_TYPE_STRING,
220         },
221         {
222                 .type = EC_CONFIG_TYPE_NONE,
223         },
224 };
225
226 static const struct ec_config_schema ec_node_re_lex_elt[] = {
227         {
228                 .desc = "A pattern element.",
229                 .type = EC_CONFIG_TYPE_DICT,
230                 .subschema = ec_node_re_lex_dict,
231         },
232         {
233                 .type = EC_CONFIG_TYPE_NONE,
234         },
235 };
236
237 static const struct ec_config_schema ec_node_re_lex_schema[] = {
238         {
239                 .key = "patterns",
240                 .desc = "The list of patterns elements.",
241                 .type = EC_CONFIG_TYPE_LIST,
242                 .subschema = ec_node_re_lex_elt,
243         },
244         {
245                 .key = "child",
246                 .desc = "The child node.",
247                 .type = EC_CONFIG_TYPE_NODE,
248         },
249         {
250                 .type = EC_CONFIG_TYPE_NONE,
251         },
252 };
253
254 static int ec_node_re_lex_set_config(struct ec_node *node,
255                                 const struct ec_config *config)
256 {
257         struct ec_node_re_lex *priv = ec_node_priv(node);
258         struct regexp_pattern *table = NULL;
259         const struct ec_config *patterns, *child, *elt, *pattern, *keep, *attr;
260         char *pattern_str = NULL, *attr_name = NULL;
261         ssize_t i, n = 0;
262         int ret;
263
264         child = ec_config_dict_get(config, "child");
265         if (child == NULL)
266                 goto fail;
267         if (ec_config_get_type(child) != EC_CONFIG_TYPE_NODE) {
268                 errno = EINVAL;
269                 goto fail;
270         }
271
272         patterns = ec_config_dict_get(config, "patterns");
273         if (patterns != NULL) {
274                 n = ec_config_count(patterns);
275                 if (n < 0)
276                         goto fail;
277
278                 table = ec_calloc(n, sizeof(*table));
279                 if (table == NULL)
280                         goto fail;
281
282                 n = 0;
283                 TAILQ_FOREACH(elt, &patterns->list, next) {
284                         if (ec_config_get_type(elt) != EC_CONFIG_TYPE_DICT) {
285                                 errno = EINVAL;
286                                 goto fail;
287                         }
288                         pattern = ec_config_dict_get(elt, "pattern");
289                         if (pattern == NULL) {
290                                 errno = EINVAL;
291                                 goto fail;
292                         }
293                         if (ec_config_get_type(pattern) != EC_CONFIG_TYPE_STRING) {
294                                 errno = EINVAL;
295                                 goto fail;
296                         }
297                         keep = ec_config_dict_get(elt, "keep");
298                         if (keep == NULL) {
299                                 errno = EINVAL;
300                                 goto fail;
301                         }
302                         if (ec_config_get_type(keep) != EC_CONFIG_TYPE_BOOL) {
303                                 errno = EINVAL;
304                                 goto fail;
305                         }
306                         attr = ec_config_dict_get(elt, "attr");
307                         if (attr != NULL && ec_config_get_type(attr) !=
308                                         EC_CONFIG_TYPE_STRING) {
309                                 errno = EINVAL;
310                                 goto fail;
311                         }
312                         pattern_str = ec_strdup(pattern->string);
313                         if (pattern_str == NULL)
314                                 goto fail;
315                         if (attr != NULL && attr->string != NULL) {
316                                 attr_name = ec_strdup(attr->string);
317                                 if (attr_name == NULL)
318                                         goto fail;
319                         }
320
321                         ret = regcomp(&table[n].r, pattern_str, REG_EXTENDED);
322                         if (ret != 0) {
323                                 EC_LOG(EC_LOG_ERR,
324                                         "Regular expression <%s> compilation failed: %d\n",
325                                         pattern_str, ret);
326                                 if (ret == REG_ESPACE)
327                                         errno = ENOMEM;
328                                 else
329                                         errno = EINVAL;
330                                 goto fail;
331                         }
332                         table[n].pattern = pattern_str;
333                         table[n].keep = keep->boolean;
334                         table[n].attr_name = attr_name;
335                         pattern_str = NULL;
336                         attr_name = NULL;
337
338                         n++;
339                 }
340         }
341
342         if (priv->child != NULL)
343                 ec_node_free(priv->child);
344         priv->child = ec_node_clone(child->node);
345         for (i = 0; i < (ssize_t)priv->len; i++) {
346                 ec_free(priv->table[i].pattern);
347                 ec_free(priv->table[i].attr_name);
348                 regfree(&priv->table[i].r);
349         }
350         ec_free(priv->table);
351         priv->table = table;
352         priv->len = n;
353
354         return 0;
355
356 fail:
357         if (table != NULL) {
358                 for (i = 0; i < n; i++) {
359                         if (table[i].pattern != NULL) {
360                                 ec_free(table[i].pattern);
361                                 regfree(&table[i].r);
362                         }
363                 }
364         }
365         ec_free(table);
366         ec_free(pattern_str);
367         return -1;
368 }
369
370 static struct ec_node_type ec_node_re_lex_type = {
371         .name = "re_lex",
372         .schema = ec_node_re_lex_schema,
373         .set_config = ec_node_re_lex_set_config,
374         .parse = ec_node_re_lex_parse,
375         .size = sizeof(struct ec_node_re_lex),
376         .free_priv = ec_node_re_lex_free_priv,
377         .get_children_count = ec_node_re_lex_get_children_count,
378         .get_child = ec_node_re_lex_get_child,
379 };
380
381 EC_NODE_TYPE_REGISTER(ec_node_re_lex_type);
382
383 int ec_node_re_lex_add(struct ec_node *node, const char *pattern, int keep,
384         const char *attr_name)
385 {
386         const struct ec_config *cur_config = NULL;
387         struct ec_config *config = NULL, *patterns = NULL, *elt = NULL;
388         int ret;
389
390         if (ec_node_check_type(node, &ec_node_re_lex_type) < 0)
391                 goto fail;
392
393         elt = ec_config_dict();
394         if (elt == NULL)
395                 goto fail;
396         if (ec_config_dict_set(elt, "pattern", ec_config_string(pattern)) < 0)
397                 goto fail;
398         if (ec_config_dict_set(elt, "keep", ec_config_bool(keep)) < 0)
399                 goto fail;
400         if (attr_name != NULL) {
401                 if (ec_config_dict_set(elt, "attr",
402                                         ec_config_string(attr_name)) < 0)
403                         goto fail;
404         }
405
406         cur_config = ec_node_get_config(node);
407         if (cur_config == NULL)
408                 config = ec_config_dict();
409         else
410                 config = ec_config_dup(cur_config);
411         if (config == NULL)
412                 goto fail;
413
414         patterns = ec_config_dict_get(config, "patterns");
415         if (patterns == NULL) {
416                 patterns = ec_config_list();
417                 if (patterns == NULL)
418                         goto fail;
419
420                 if (ec_config_dict_set(config, "patterns", patterns) < 0)
421                         goto fail; /* patterns list is freed on error */
422         }
423
424         if (ec_config_list_add(patterns, elt) < 0) {
425                 elt = NULL;
426                 goto fail;
427         }
428         elt = NULL;
429
430         ret = ec_node_set_config(node, config);
431         config = NULL; /* freed */
432         if (ret < 0)
433                 goto fail;
434
435         return 0;
436
437 fail:
438         ec_config_free(config);
439         ec_config_free(elt);
440         return -1;
441 }
442
443 static int
444 ec_node_re_lex_set_child(struct ec_node *node, struct ec_node *child)
445 {
446         const struct ec_config *cur_config = NULL;
447         struct ec_config *config = NULL;
448         int ret;
449
450         if (ec_node_check_type(node, &ec_node_re_lex_type) < 0)
451                 goto fail;
452
453         cur_config = ec_node_get_config(node);
454         if (cur_config == NULL)
455                 config = ec_config_dict();
456         else
457                 config = ec_config_dup(cur_config);
458         if (config == NULL)
459                 goto fail;
460
461         if (ec_config_dict_set(config, "child", ec_config_node(child)) < 0) {
462                 child = NULL; /* freed */
463                 goto fail;
464         }
465         child = NULL; /* freed */
466
467         ret = ec_node_set_config(node, config);
468         config = NULL; /* freed */
469         if (ret < 0)
470                 goto fail;
471
472         return 0;
473
474 fail:
475         ec_config_free(config);
476         ec_node_free(child);
477         return -1;
478 }
479
480 struct ec_node *ec_node_re_lex(const char *id, struct ec_node *child)
481 {
482         struct ec_node *node = NULL;
483
484         if (child == NULL)
485                 return NULL;
486
487         node = ec_node_from_type(&ec_node_re_lex_type, id);
488         if (node == NULL)
489                 goto fail;
490
491         if (ec_node_re_lex_set_child(node, child) < 0) {
492                 child = NULL; /* freed */
493                 goto fail;
494         }
495
496         return node;
497
498 fail:
499         ec_node_free(node);
500         ec_node_free(child);
501         return NULL;
502 }
503
504 /* LCOV_EXCL_START */
505 static int ec_node_re_lex_testcase(void)
506 {
507         struct ec_node *node;
508         int ret, testres = 0;
509
510         node = ec_node_re_lex(EC_NO_ID,
511                 ec_node_many(EC_NO_ID,
512                         EC_NODE_OR(EC_NO_ID,
513                                 ec_node_str(EC_NO_ID, "foo"),
514                                 ec_node_str(EC_NO_ID, "bar"),
515                                 ec_node_int(EC_NO_ID, 0, 1000, 0)
516                         ), 0, 0
517                 )
518         );
519         if (node == NULL) {
520                 EC_LOG(EC_LOG_ERR, "cannot create node\n");
521                 return -1;
522         }
523
524         ret = ec_node_re_lex_add(node, "[a-zA-Z]+", 1, NULL);
525         testres |= EC_TEST_CHECK(ret == 0, "cannot add regexp");
526         ret = ec_node_re_lex_add(node, "[0-9]+", 1, NULL);
527         testres |= EC_TEST_CHECK(ret == 0, "cannot add regexp");
528         ret = ec_node_re_lex_add(node, "=", 1, NULL);
529         testres |= EC_TEST_CHECK(ret == 0, "cannot add regexp");
530         ret = ec_node_re_lex_add(node, "-", 1, NULL);
531         testres |= EC_TEST_CHECK(ret == 0, "cannot add regexp");
532         ret = ec_node_re_lex_add(node, "\\+", 1, NULL);
533         testres |= EC_TEST_CHECK(ret == 0, "cannot add regexp");
534         ret = ec_node_re_lex_add(node, "[       ]+", 0, NULL);
535         testres |= EC_TEST_CHECK(ret == 0, "cannot add regexp");
536         if (ret != 0) {
537                 EC_LOG(EC_LOG_ERR, "cannot add regexp to node\n");
538                 ec_node_free(node);
539                 return -1;
540         }
541
542         testres |= EC_TEST_CHECK_PARSE(node, 1, "  foo bar  324 bar234");
543         testres |= EC_TEST_CHECK_PARSE(node, 1, "foo bar324");
544         testres |= EC_TEST_CHECK_PARSE(node, 1, "");
545         testres |= EC_TEST_CHECK_PARSE(node, -1, "foobar");
546
547         /* no completion */
548         testres |= EC_TEST_CHECK_COMPLETE(node,
549                 "", EC_VA_END,
550                 EC_VA_END);
551
552         ec_node_free(node);
553
554         return testres;
555 }
556 /* LCOV_EXCL_STOP */
557
558 static struct ec_test ec_node_re_lex_test = {
559         .name = "node_re_lex",
560         .test = ec_node_re_lex_testcase,
561 };
562
563 EC_TEST_REGISTER(ec_node_re_lex_test);