8 #include <ecoli_malloc.h>
10 #include <ecoli_test.h>
11 #include <ecoli_strvec.h>
13 #include <ecoli_tk_many.h>
14 #include <ecoli_tk_or.h>
15 #include <ecoli_tk_str.h>
16 #include <ecoli_tk_int.h>
17 #include <ecoli_tk_re_lex.h>
19 struct regexp_pattern {
28 struct regexp_pattern *table;
32 static struct ec_strvec *
33 tokenize(struct regexp_pattern *table, size_t table_len, const char *str)
35 struct ec_strvec *strvec = NULL;
47 strvec = ec_strvec_new();
53 for (i = 0; i < table_len; i++) {
54 ret = regexec(&table[i].r, &dup[off], 1, &pos, 0);
57 if (pos.rm_so != 0 || pos.rm_eo == 0) {
62 if (table[i].keep == 0)
65 c = dup[pos.rm_eo + off];
66 dup[pos.rm_eo + off] = '\0';
67 ec_log(EC_LOG_DEBUG, "re_lex match <%s>\n", &dup[off]);
68 if (ec_strvec_add(strvec, &dup[off]) < 0)
71 dup[pos.rm_eo + off] = c;
86 ec_strvec_free(strvec);
90 static struct ec_parsed_tk *ec_tk_re_lex_parse(const struct ec_tk *gen_tk,
91 const struct ec_strvec *strvec)
93 struct ec_tk_re_lex *tk = (struct ec_tk_re_lex *)gen_tk;
94 struct ec_strvec *new_vec = NULL, *match_strvec;
95 struct ec_parsed_tk *parsed_tk = NULL, *child_parsed_tk;
98 parsed_tk = ec_parsed_tk_new();
99 if (parsed_tk == NULL)
102 if (ec_strvec_len(strvec) == 0)
105 str = ec_strvec_val(strvec, 0);
106 new_vec = tokenize(tk->table, tk->len, str);
110 child_parsed_tk = ec_tk_parse_tokens(tk->child, new_vec);
111 if (child_parsed_tk == NULL)
114 if (!ec_parsed_tk_matches(child_parsed_tk) ||
115 ec_parsed_tk_len(child_parsed_tk) !=
116 ec_strvec_len(new_vec)) {
117 ec_strvec_free(new_vec);
118 ec_parsed_tk_free(child_parsed_tk);
121 ec_strvec_free(new_vec);
124 ec_parsed_tk_add_child(parsed_tk, child_parsed_tk);
125 match_strvec = ec_strvec_ndup(strvec, 0, 1);
126 if (match_strvec == NULL)
128 ec_parsed_tk_set_match(parsed_tk, gen_tk, match_strvec);
133 ec_strvec_free(new_vec);
134 ec_parsed_tk_free(parsed_tk);
139 static void ec_tk_re_lex_free_priv(struct ec_tk *gen_tk)
141 struct ec_tk_re_lex *tk = (struct ec_tk_re_lex *)gen_tk;
144 for (i = 0; i < tk->len; i++) {
145 ec_free(tk->table[i].pattern);
146 regfree(&tk->table[i].r);
150 ec_tk_free(tk->child);
153 static struct ec_tk_ops ec_tk_re_lex_ops = {
154 .typename = "re_lex",
155 .parse = ec_tk_re_lex_parse,
156 //.complete = ec_tk_re_lex_complete, //XXX
157 .free_priv = ec_tk_re_lex_free_priv,
160 int ec_tk_re_lex_add(struct ec_tk *gen_tk, const char *pattern, int keep)
162 struct ec_tk_re_lex *tk = (struct ec_tk_re_lex *)gen_tk;
163 struct regexp_pattern *table;
165 char *pat_dup = NULL;
168 pat_dup = ec_strdup(pattern);
173 table = ec_realloc(tk->table, sizeof(*table) * (tk->len + 1));
177 ret = regcomp(&table[tk->len].r, pattern, REG_EXTENDED);
180 "Regular expression <%s> compilation failed: %d\n",
182 if (ret == REG_ESPACE)
190 table[tk->len].pattern = pat_dup;
191 table[tk->len].keep = keep;
202 struct ec_tk *ec_tk_re_lex(const char *id, struct ec_tk *child)
204 struct ec_tk_re_lex *tk = NULL;
209 tk = (struct ec_tk_re_lex *)ec_tk_new(id, &ec_tk_re_lex_ops,
222 static int ec_tk_re_lex_testcase(void)
227 tk = ec_tk_re_lex(NULL,
230 ec_tk_str(NULL, "foo"),
231 ec_tk_str(NULL, "bar"),
232 ec_tk_int(NULL, 0, 1000, 0)
237 ec_log(EC_LOG_ERR, "cannot create tk\n");
241 /* XXX add ^ automatically ? */
242 ret |= ec_tk_re_lex_add(tk, "^[a-zA-Z]+", 1);
243 ret |= ec_tk_re_lex_add(tk, "^[0-9]+", 1);
244 ret |= ec_tk_re_lex_add(tk, "^=", 1);
245 ret |= ec_tk_re_lex_add(tk, "^-", 1);
246 ret |= ec_tk_re_lex_add(tk, "^\\+", 1);
247 ret |= ec_tk_re_lex_add(tk, "^[ ]+", 0);
249 ec_log(EC_LOG_ERR, "cannot add regexp to token\n");
254 ret |= EC_TEST_CHECK_TK_PARSE(tk, 1, " foo bar 324 bar234");
255 ret |= EC_TEST_CHECK_TK_PARSE(tk, 1, "foo bar324");
256 ret |= EC_TEST_CHECK_TK_PARSE(tk, 1, "");
257 ret |= EC_TEST_CHECK_TK_PARSE(tk, -1, "foobar");
264 static struct ec_test ec_tk_re_lex_test = {
266 .test = ec_tk_re_lex_testcase,
269 EC_REGISTER_TEST(ec_tk_re_lex_test);