save
[protos/libecoli.git] / lib / ecoli_node_sh_lex.c
1 /*
2  * Copyright (c) 2016, Olivier MATZ <zer0@droids-corp.org>
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of the University of California, Berkeley nor the
13  *       names of its contributors may be used to endorse or promote products
14  *       derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <assert.h>
32 #include <stdarg.h>
33 #include <ctype.h>
34 #include <errno.h>
35
36 #include <ecoli_malloc.h>
37 #include <ecoli_log.h>
38 #include <ecoli_test.h>
39 #include <ecoli_strvec.h>
40 #include <ecoli_node.h>
41 #include <ecoli_parsed.h>
42 #include <ecoli_completed.h>
43 #include <ecoli_node_seq.h>
44 #include <ecoli_node_str.h>
45 #include <ecoli_node_option.h>
46 #include <ecoli_node_sh_lex.h>
47
48 struct ec_node_sh_lex {
49         struct ec_node gen;
50         struct ec_node *child;
51 };
52
53 static size_t eat_spaces(const char *str)
54 {
55         size_t i = 0;
56
57         /* skip spaces */
58         while (isblank(str[i]))
59                 i++;
60
61         return i;
62 }
63
64 /*
65  * Allocate a new string which is a copy of the input string with quotes
66  * removed. If quotes are not closed properly, set missing_quote to the
67  * missing quote char.
68  */
69 static char *unquote_str(const char *str, size_t n, int allow_missing_quote,
70         char *missing_quote)
71 {
72         unsigned s = 1, d = 0;
73         char quote = str[0];
74         char *dst;
75         int closed = 0;
76
77         dst = ec_malloc(n);
78         if (dst == NULL) {
79                 errno = ENOMEM;
80                 return NULL;
81         }
82
83         /* copy string and remove quotes */
84         while (s < n && d < n && str[s] != '\0') {
85                 if (str[s] == '\\' && str[s+1] == quote) {
86                         dst[d++] = quote;
87                         s += 2;
88                         continue;
89                 }
90                 if (str[s] == '\\' && str[s+1] == '\\') {
91                         dst[d++] = '\\';
92                         s += 2;
93                         continue;
94                 }
95                 if (str[s] == quote) {
96                         s++;
97                         closed = 1;
98                         break;
99                 }
100                 dst[d++] = str[s++];
101         }
102
103         /* not enough room in dst buffer (should not happen) */
104         if (d >= n) {
105                 ec_free(dst);
106                 errno = EMSGSIZE;
107                 return NULL;
108         }
109
110         /* quote not closed */
111         if (closed == 0) {
112                 if (missing_quote != NULL)
113                         *missing_quote = str[0];
114                 if (allow_missing_quote == 0) {
115                         ec_free(dst);
116                         errno = EINVAL;
117                         return NULL;
118                 }
119         }
120         dst[d++] = '\0';
121
122         return dst;
123 }
124
125 static size_t eat_quoted_str(const char *str)
126 {
127         size_t i = 0;
128         char quote = str[0];
129
130         while (str[i] != '\0') {
131                 if (str[i] != '\\' && str[i+1] == quote)
132                         return i + 2;
133                 i++;
134         }
135
136         /* unclosed quote, will be detected later */
137         return i;
138 }
139
140 static size_t eat_str(const char *str)
141 {
142         size_t i = 0;
143
144         /* skip spaces */
145         while (!isblank(str[i]) && str[i] != '\0')
146                 i++;
147
148         return i;
149 }
150
151 static struct ec_strvec *tokenize(const char *str, int completion,
152         int allow_missing_quote, char *missing_quote)
153 {
154         struct ec_strvec *strvec = NULL;
155         size_t off = 0, len, suboff, sublen;
156         char *word = NULL, *concat = NULL, *tmp;
157         int last_is_space = 1;
158
159 //      printf("str=%s\n", str);
160
161         strvec = ec_strvec();
162         if (strvec == NULL)
163                 goto fail;
164
165         while (str[off] != '\0') {
166                 len = eat_spaces(&str[off]);
167                 if (len > 0)
168                         last_is_space = 1;
169 //              printf("space=%zd\n", len);
170                 off += len;
171
172                 len = 0;
173                 suboff = off;
174                 while (str[suboff] != '\0') {
175                         last_is_space = 0;
176                         if (str[suboff] == '"' || str[suboff] == '\'') {
177                                 sublen = eat_quoted_str(&str[suboff]);
178 //                              printf("sublen=%zd\n", sublen);
179                                 word = unquote_str(&str[suboff], sublen,
180                                         allow_missing_quote, missing_quote);
181                         } else {
182                                 sublen = eat_str(&str[suboff]);
183 //                              printf("sublen=%zd\n", sublen);
184                                 if (sublen == 0)
185                                         break;
186                                 word = ec_strndup(&str[suboff], sublen);
187                         }
188
189                         if (word == NULL)
190                                 goto fail;
191 //                      printf("word=%s\n", word);
192
193                         len += sublen;
194                         suboff += sublen;
195
196                         if (concat == NULL) {
197                                 concat = word;
198                                 word = NULL;
199                         } else {
200                                 tmp = ec_realloc(concat, len + 1);
201                                 if (tmp == NULL)
202                                         goto fail;
203                                 concat = tmp;
204                                 strcat(concat, word);
205                                 ec_free(word);
206                                 word = NULL;
207                         }
208                 }
209
210                 if (concat != NULL) {
211                         if (ec_strvec_add(strvec, concat) < 0)
212                                 goto fail;
213                         ec_free(concat);
214                         concat = NULL;
215                 }
216
217                 /* XXX remove all printf comments */
218 //              printf("str off=%zd len=%zd\n", off, len);
219                 off += len;
220         }
221
222         /* in completion mode, append an empty string in the vector if
223          * the input string ends with space */
224         if (completion && last_is_space) {
225                 if (ec_strvec_add(strvec, "") < 0)
226                         goto fail;
227         }
228
229         return strvec;
230
231  fail:
232         ec_free(word);
233         ec_free(concat);
234         ec_strvec_free(strvec);
235         return NULL;
236 }
237
238 static int
239 ec_node_sh_lex_parse(const struct ec_node *gen_node,
240                 struct ec_parsed *state,
241                 const struct ec_strvec *strvec)
242 {
243         struct ec_node_sh_lex *node = (struct ec_node_sh_lex *)gen_node;
244         struct ec_strvec *new_vec = NULL;
245         struct ec_parsed *child_parsed;
246         const char *str;
247         int ret;
248
249         if (ec_strvec_len(strvec) == 0) {
250                 new_vec = ec_strvec();
251         } else {
252                 str = ec_strvec_val(strvec, 0);
253                 new_vec = tokenize(str, 0, 0, NULL);
254         }
255         if (new_vec == NULL) {
256                 ret = -ENOMEM;
257                 goto fail;
258         }
259
260         ret = ec_node_parse_child(node->child, state, new_vec);
261         if (ret >= 0) {
262                 if ((unsigned)ret == ec_strvec_len(new_vec)) {
263                         ret = 1;
264                 } else {
265                         child_parsed = ec_parsed_get_last_child(state);
266                         ec_parsed_del_child(state, child_parsed);
267                         ec_parsed_free(child_parsed);
268                         ret = EC_PARSED_NOMATCH;
269                 }
270         }
271
272         ec_strvec_free(new_vec);
273         new_vec = NULL;
274
275         return ret;
276
277  fail:
278         ec_strvec_free(new_vec);
279         return ret;
280 }
281
282 static struct ec_completed *
283 ec_node_sh_lex_complete(const struct ec_node *gen_node,
284                         struct ec_parsed *state,
285                         const struct ec_strvec *strvec)
286 {
287         struct ec_node_sh_lex *node = (struct ec_node_sh_lex *)gen_node;
288         struct ec_completed *completed, *child_completed = NULL;
289         struct ec_strvec *new_vec = NULL;
290         const char *str;
291         char missing_quote;
292
293 //      printf("==================\n");
294         completed = ec_completed();
295         if (completed == NULL)
296                 return NULL;
297
298         if (ec_strvec_len(strvec) != 1)
299                 return completed;
300
301         str = ec_strvec_val(strvec, 0);
302         new_vec = tokenize(str, 1, 1, &missing_quote);
303         if (new_vec == NULL)
304                 goto fail;
305
306 //      ec_strvec_dump(new_vec, stdout);
307
308         child_completed = ec_node_complete_child(node->child, state, new_vec);
309         if (child_completed == NULL)
310                 goto fail;
311
312         ec_strvec_free(new_vec);
313         new_vec = NULL;
314         ec_completed_merge(completed, child_completed);
315
316         return completed;
317
318  fail:
319         ec_strvec_free(new_vec);
320         ec_completed_free(completed);
321         return NULL;
322 }
323
324 static void ec_node_sh_lex_free_priv(struct ec_node *gen_node)
325 {
326         struct ec_node_sh_lex *node = (struct ec_node_sh_lex *)gen_node;
327
328         ec_node_free(node->child);
329 }
330
331 static struct ec_node_type ec_node_sh_lex_type = {
332         .name = "sh_lex",
333         .parse = ec_node_sh_lex_parse,
334         .complete = ec_node_sh_lex_complete,
335         .size = sizeof(struct ec_node_sh_lex),
336         .free_priv = ec_node_sh_lex_free_priv,
337 };
338
339 EC_NODE_TYPE_REGISTER(ec_node_sh_lex_type);
340
341 struct ec_node *ec_node_sh_lex(const char *id, struct ec_node *child)
342 {
343         struct ec_node_sh_lex *node = NULL;
344
345         if (child == NULL)
346                 return NULL;
347
348         node = (struct ec_node_sh_lex *)__ec_node(&ec_node_sh_lex_type, id);
349         if (node == NULL) {
350                 ec_node_free(child);
351                 return NULL;
352         }
353
354         node->child = child;
355
356         return &node->gen;
357 }
358
359 /* LCOV_EXCL_START */
360 static int ec_node_sh_lex_testcase(void)
361 {
362         struct ec_node *node;
363         int ret = 0;
364
365         node = ec_node_sh_lex(NULL,
366                 EC_NODE_SEQ(NULL,
367                         ec_node_str(NULL, "foo"),
368                         ec_node_option(NULL,
369                                 ec_node_str(NULL, "toto")
370                         ),
371                         ec_node_str(NULL, "bar")
372                 )
373         );
374         if (node == NULL) {
375                 ec_log(EC_LOG_ERR, "cannot create node\n");
376                 return -1;
377         }
378         ret |= EC_TEST_CHECK_PARSE(node, 1, "foo bar");
379         ret |= EC_TEST_CHECK_PARSE(node, 1, "  foo   bar");
380         ret |= EC_TEST_CHECK_PARSE(node, 1, "  'foo' \"bar\"");
381         ret |= EC_TEST_CHECK_PARSE(node, 1, "  'f'oo 'toto' bar");
382         ec_node_free(node);
383
384         /* test completion */
385         node = ec_node_sh_lex(NULL,
386                 EC_NODE_SEQ(NULL,
387                         ec_node_str(NULL, "foo"),
388                         ec_node_option(NULL,
389                                 ec_node_str(NULL, "toto")
390                         ),
391                         ec_node_str(NULL, "bar"),
392                         ec_node_str(NULL, "titi")
393                 )
394         );
395         if (node == NULL) {
396                 ec_log(EC_LOG_ERR, "cannot create node\n");
397                 return -1;
398         }
399         ret |= EC_TEST_CHECK_COMPLETE(node,
400                 "", EC_NODE_ENDLIST,
401                 "foo", EC_NODE_ENDLIST,
402                 "foo");
403         ret |= EC_TEST_CHECK_COMPLETE(node,
404                 " ", EC_NODE_ENDLIST,
405                 "foo", EC_NODE_ENDLIST,
406                 "foo");
407         ret |= EC_TEST_CHECK_COMPLETE(node,
408                 "f", EC_NODE_ENDLIST,
409                 "oo", EC_NODE_ENDLIST,
410                 "oo");
411         ret |= EC_TEST_CHECK_COMPLETE(node,
412                 "foo", EC_NODE_ENDLIST,
413                 "", EC_NODE_ENDLIST,
414                 "");
415         ret |= EC_TEST_CHECK_COMPLETE(node,
416                 "foo ", EC_NODE_ENDLIST,
417                 "bar", "toto", EC_NODE_ENDLIST,
418                 "");
419         ret |= EC_TEST_CHECK_COMPLETE(node,
420                 "foo t", EC_NODE_ENDLIST,
421                 "oto", EC_NODE_ENDLIST,
422                 "oto");
423         ret |= EC_TEST_CHECK_COMPLETE(node,
424                 "foo b", EC_NODE_ENDLIST,
425                 "ar", EC_NODE_ENDLIST,
426                 "ar");
427         ret |= EC_TEST_CHECK_COMPLETE(node,
428                 "foo bar", EC_NODE_ENDLIST,
429                 "", EC_NODE_ENDLIST,
430                 "");
431         ret |= EC_TEST_CHECK_COMPLETE(node,
432                 "foo bar ", EC_NODE_ENDLIST,
433                 "titi", EC_NODE_ENDLIST,
434                 "titi");
435         ret |= EC_TEST_CHECK_COMPLETE(node,
436                 "foo toto bar ", EC_NODE_ENDLIST,
437                 "titi", EC_NODE_ENDLIST,
438                 "titi");
439         ret |= EC_TEST_CHECK_COMPLETE(node,
440                 "x", EC_NODE_ENDLIST,
441                 EC_NODE_ENDLIST,
442                 "");
443         ret |= EC_TEST_CHECK_COMPLETE(node,
444                 "foo barx", EC_NODE_ENDLIST,
445                 EC_NODE_ENDLIST,
446                 "");
447
448         ec_node_free(node);
449         return ret;
450 }
451 /* LCOV_EXCL_STOP */
452
453 static struct ec_test ec_node_sh_lex_test = {
454         .name = "node_sh_lex",
455         .test = ec_node_sh_lex_testcase,
456 };
457
458 EC_TEST_REGISTER(ec_node_sh_lex_test);