store full token and completion in completed_item
[protos/libecoli.git] / lib / ecoli_node_sh_lex.c
1 /*
2  * Copyright (c) 2016, Olivier MATZ <zer0@droids-corp.org>
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of the University of California, Berkeley nor the
13  *       names of its contributors may be used to endorse or promote products
14  *       derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27
28 #define _GNU_SOURCE /* for asprintf */
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33 #include <stdarg.h>
34 #include <ctype.h>
35 #include <errno.h>
36
37 #include <ecoli_malloc.h>
38 #include <ecoli_log.h>
39 #include <ecoli_test.h>
40 #include <ecoli_strvec.h>
41 #include <ecoli_node.h>
42 #include <ecoli_parsed.h>
43 #include <ecoli_completed.h>
44 #include <ecoli_node_seq.h>
45 #include <ecoli_node_str.h>
46 #include <ecoli_node_option.h>
47 #include <ecoli_node_sh_lex.h>
48
49 EC_LOG_TYPE_REGISTER(node_sh_lex);
50
51 struct ec_node_sh_lex {
52         struct ec_node gen;
53         struct ec_node *child;
54 };
55
56 static size_t eat_spaces(const char *str)
57 {
58         size_t i = 0;
59
60         /* skip spaces */
61         while (isblank(str[i]))
62                 i++;
63
64         return i;
65 }
66
67 /*
68  * Allocate a new string which is a copy of the input string with quotes
69  * removed. If quotes are not closed properly, set missing_quote to the
70  * missing quote char.
71  */
72 static char *unquote_str(const char *str, size_t n, int allow_missing_quote,
73         char *missing_quote)
74 {
75         unsigned s = 1, d = 0;
76         char quote = str[0];
77         char *dst;
78         int closed = 0;
79
80         dst = ec_malloc(n);
81         if (dst == NULL) {
82                 errno = ENOMEM;
83                 return NULL;
84         }
85
86         /* copy string and remove quotes */
87         while (s < n && d < n && str[s] != '\0') {
88                 if (str[s] == '\\' && str[s+1] == quote) {
89                         dst[d++] = quote;
90                         s += 2;
91                         continue;
92                 }
93                 if (str[s] == '\\' && str[s+1] == '\\') {
94                         dst[d++] = '\\';
95                         s += 2;
96                         continue;
97                 }
98                 if (str[s] == quote) {
99                         s++;
100                         closed = 1;
101                         break;
102                 }
103                 dst[d++] = str[s++];
104         }
105
106         /* not enough room in dst buffer (should not happen) */
107         if (d >= n) {
108                 ec_free(dst);
109                 errno = EMSGSIZE;
110                 return NULL;
111         }
112
113         /* quote not closed */
114         if (closed == 0) {
115                 if (missing_quote != NULL)
116                         *missing_quote = str[0];
117                 if (allow_missing_quote == 0) {
118                         ec_free(dst);
119                         errno = EINVAL;
120                         return NULL;
121                 }
122         }
123         dst[d++] = '\0';
124
125         return dst;
126 }
127
128 static size_t eat_quoted_str(const char *str)
129 {
130         size_t i = 0;
131         char quote = str[0];
132
133         while (str[i] != '\0') {
134                 if (str[i] != '\\' && str[i+1] == quote)
135                         return i + 2;
136                 i++;
137         }
138
139         /* unclosed quote, will be detected later */
140         return i;
141 }
142
143 static size_t eat_str(const char *str)
144 {
145         size_t i = 0;
146
147         /* eat chars until we find a quote, space, or end of string  */
148         while (!isblank(str[i]) && str[i] != '\0' &&
149                         str[i] != '"' && str[i] != '\'')
150                 i++;
151
152         return i;
153 }
154
155 static struct ec_strvec *tokenize(const char *str, int completion,
156         int allow_missing_quote, char *missing_quote)
157 {
158         struct ec_strvec *strvec = NULL;
159         size_t off = 0, len, suboff, sublen;
160         char *word = NULL, *concat = NULL, *tmp;
161         int last_is_space = 1;
162
163         strvec = ec_strvec();
164         if (strvec == NULL)
165                 goto fail;
166
167         while (str[off] != '\0') {
168                 if (missing_quote != NULL)
169                         *missing_quote = '\0';
170                 len = eat_spaces(&str[off]);
171                 if (len > 0)
172                         last_is_space = 1;
173                 off += len;
174
175                 len = 0;
176                 suboff = off;
177                 while (str[suboff] != '\0') {
178                         if (missing_quote != NULL)
179                                 *missing_quote = '\0';
180                         last_is_space = 0;
181                         if (str[suboff] == '"' || str[suboff] == '\'') {
182                                 sublen = eat_quoted_str(&str[suboff]);
183                                 word = unquote_str(&str[suboff], sublen,
184                                         allow_missing_quote, missing_quote);
185                         } else {
186                                 sublen = eat_str(&str[suboff]);
187                                 if (sublen == 0)
188                                         break;
189                                 word = ec_strndup(&str[suboff], sublen);
190                         }
191
192                         if (word == NULL)
193                                 goto fail;
194
195                         len += sublen;
196                         suboff += sublen;
197
198                         if (concat == NULL) {
199                                 concat = word;
200                                 word = NULL;
201                         } else {
202                                 tmp = ec_realloc(concat, len + 1);
203                                 if (tmp == NULL)
204                                         goto fail;
205                                 concat = tmp;
206                                 strcat(concat, word);
207                                 ec_free(word);
208                                 word = NULL;
209                         }
210                 }
211
212                 if (concat != NULL) {
213                         if (ec_strvec_add(strvec, concat) < 0)
214                                 goto fail;
215                         ec_free(concat);
216                         concat = NULL;
217                 }
218
219                 off += len;
220         }
221
222         /* in completion mode, append an empty string in the vector if
223          * the input string ends with space */
224         if (completion && last_is_space) {
225                 if (ec_strvec_add(strvec, "") < 0)
226                         goto fail;
227         }
228
229         return strvec;
230
231  fail:
232         ec_free(word);
233         ec_free(concat);
234         ec_strvec_free(strvec);
235         return NULL;
236 }
237
238 static int
239 ec_node_sh_lex_parse(const struct ec_node *gen_node,
240                 struct ec_parsed *state,
241                 const struct ec_strvec *strvec)
242 {
243         struct ec_node_sh_lex *node = (struct ec_node_sh_lex *)gen_node;
244         struct ec_strvec *new_vec = NULL;
245         struct ec_parsed *child_parsed;
246         const char *str;
247         int ret;
248
249         if (ec_strvec_len(strvec) == 0) {
250                 new_vec = ec_strvec();
251         } else {
252                 str = ec_strvec_val(strvec, 0);
253                 new_vec = tokenize(str, 0, 0, NULL);
254         }
255         if (new_vec == NULL) {
256                 ret = -ENOMEM;
257                 goto fail;
258         }
259
260         ret = ec_node_parse_child(node->child, state, new_vec);
261         if (ret < 0)
262                 goto fail;
263
264         if ((unsigned)ret == ec_strvec_len(new_vec)) {
265                 ret = 1;
266         } else if (ret != EC_PARSED_NOMATCH) {
267                 child_parsed = ec_parsed_get_last_child(state);
268                 ec_parsed_del_child(state, child_parsed);
269                 ec_parsed_free(child_parsed);
270                 ret = EC_PARSED_NOMATCH;
271         }
272
273         ec_strvec_free(new_vec);
274         new_vec = NULL;
275
276         return ret;
277
278  fail:
279         ec_strvec_free(new_vec);
280         return ret;
281 }
282
283 static int
284 ec_node_sh_lex_complete(const struct ec_node *gen_node,
285                         struct ec_completed *completed,
286                         const struct ec_strvec *strvec)
287 {
288         struct ec_node_sh_lex *node = (struct ec_node_sh_lex *)gen_node;
289         struct ec_completed *tmp_completed = NULL;
290         struct ec_strvec *new_vec = NULL;
291         struct ec_completed_iter *iter = NULL;
292         struct ec_completed_item *item = NULL;
293         char *new_str = NULL;
294         const char *str;
295         char missing_quote;
296         int ret;
297
298         if (ec_strvec_len(strvec) != 1)
299                 return 0;
300
301         str = ec_strvec_val(strvec, 0);
302         new_vec = tokenize(str, 1, 1, &missing_quote);
303         if (new_vec == NULL)
304                 goto fail;
305
306         /* we will store the completions in a temporary struct, because
307          * we want to update them (ex: add missing quotes) */
308         tmp_completed = ec_completed(ec_completed_get_state(completed));
309         if (tmp_completed == NULL)
310                 goto fail;
311
312         ret = ec_node_complete_child(node->child, tmp_completed, new_vec);
313         if (ret < 0)
314                 goto fail;
315
316         /* add missing quote for full completions  */
317         if (missing_quote != '\0') {
318                 iter = ec_completed_iter(tmp_completed, EC_COMP_FULL);
319                 if (iter == NULL)
320                         goto fail;
321                 while ((item = ec_completed_iter_next(iter)) != NULL) {
322                         str = ec_completed_item_get_str(item);
323                         if (asprintf(&new_str, "%c%s%c", missing_quote, str,
324                                         missing_quote) < 0) {
325                                 new_str = NULL;
326                                 goto fail;
327                         }
328                         if (ec_completed_item_set_str(item, new_str) < 0)
329                                 goto fail;
330                         free(new_str);
331                         new_str = NULL;
332
333                         str = ec_completed_item_get_completion(item);
334                         if (asprintf(&new_str, "%s%c", str,
335                                         missing_quote) < 0) {
336                                 new_str = NULL;
337                                 goto fail;
338                         }
339                         if (ec_completed_item_set_completion(item, new_str) < 0)
340                                 goto fail;
341                         free(new_str);
342                         new_str = NULL;
343                 }
344         }
345
346         ec_completed_iter_free(iter);
347         ec_strvec_free(new_vec);
348
349         ec_completed_merge(completed, tmp_completed);
350
351         return 0;
352
353  fail:
354         ec_completed_free(tmp_completed);
355         ec_completed_iter_free(iter);
356         ec_strvec_free(new_vec);
357         free(new_str);
358
359         return -1;
360 }
361
362 static void ec_node_sh_lex_free_priv(struct ec_node *gen_node)
363 {
364         struct ec_node_sh_lex *node = (struct ec_node_sh_lex *)gen_node;
365
366         ec_node_free(node->child);
367 }
368
369 static struct ec_node_type ec_node_sh_lex_type = {
370         .name = "sh_lex",
371         .parse = ec_node_sh_lex_parse,
372         .complete = ec_node_sh_lex_complete,
373         .size = sizeof(struct ec_node_sh_lex),
374         .free_priv = ec_node_sh_lex_free_priv,
375 };
376
377 EC_NODE_TYPE_REGISTER(ec_node_sh_lex_type);
378
379 struct ec_node *ec_node_sh_lex(const char *id, struct ec_node *child)
380 {
381         struct ec_node_sh_lex *node = NULL;
382
383         if (child == NULL)
384                 return NULL;
385
386         node = (struct ec_node_sh_lex *)__ec_node(&ec_node_sh_lex_type, id);
387         if (node == NULL) {
388                 ec_node_free(child);
389                 return NULL;
390         }
391
392         node->child = child;
393
394         return &node->gen;
395 }
396
397 /* LCOV_EXCL_START */
398 static int ec_node_sh_lex_testcase(void)
399 {
400         struct ec_node *node;
401         int ret = 0;
402
403         node = ec_node_sh_lex(NULL,
404                 EC_NODE_SEQ(NULL,
405                         ec_node_str(NULL, "foo"),
406                         ec_node_option(NULL,
407                                 ec_node_str(NULL, "toto")
408                         ),
409                         ec_node_str(NULL, "bar")
410                 )
411         );
412         if (node == NULL) {
413                 EC_LOG(EC_LOG_ERR, "cannot create node\n");
414                 return -1;
415         }
416         ret |= EC_TEST_CHECK_PARSE(node, 1, "foo bar");
417         ret |= EC_TEST_CHECK_PARSE(node, 1, "  foo   bar");
418         ret |= EC_TEST_CHECK_PARSE(node, 1, "  'foo' \"bar\"");
419         ret |= EC_TEST_CHECK_PARSE(node, 1, "  'f'oo 'toto' bar");
420         ret |= EC_TEST_CHECK_PARSE(node, -1, "  foo toto bar'");
421         ec_node_free(node);
422
423         /* test completion */
424         node = ec_node_sh_lex(NULL,
425                 EC_NODE_SEQ(NULL,
426                         ec_node_str(NULL, "foo"),
427                         ec_node_option(NULL,
428                                 ec_node_str(NULL, "toto")
429                         ),
430                         ec_node_str(NULL, "bar"),
431                         ec_node_str(NULL, "titi")
432                 )
433         );
434         if (node == NULL) {
435                 EC_LOG(EC_LOG_ERR, "cannot create node\n");
436                 return -1;
437         }
438         ret |= EC_TEST_CHECK_COMPLETE(node,
439                 "", EC_NODE_ENDLIST,
440                 "foo", EC_NODE_ENDLIST);
441         ret |= EC_TEST_CHECK_COMPLETE(node,
442                 " ", EC_NODE_ENDLIST,
443                 "foo", EC_NODE_ENDLIST);
444         ret |= EC_TEST_CHECK_COMPLETE(node,
445                 "f", EC_NODE_ENDLIST,
446                 "foo", EC_NODE_ENDLIST);
447         ret |= EC_TEST_CHECK_COMPLETE(node,
448                 "foo", EC_NODE_ENDLIST,
449                 "foo", EC_NODE_ENDLIST);
450         ret |= EC_TEST_CHECK_COMPLETE(node,
451                 "foo ", EC_NODE_ENDLIST,
452                 "bar", "toto", EC_NODE_ENDLIST);
453         ret |= EC_TEST_CHECK_COMPLETE(node,
454                 "foo t", EC_NODE_ENDLIST,
455                 "toto", EC_NODE_ENDLIST);
456         ret |= EC_TEST_CHECK_COMPLETE(node,
457                 "foo b", EC_NODE_ENDLIST,
458                 "bar", EC_NODE_ENDLIST);
459         ret |= EC_TEST_CHECK_COMPLETE(node,
460                 "foo bar", EC_NODE_ENDLIST,
461                 "bar", EC_NODE_ENDLIST);
462         ret |= EC_TEST_CHECK_COMPLETE(node,
463                 "foo bar ", EC_NODE_ENDLIST,
464                 "titi", EC_NODE_ENDLIST);
465         ret |= EC_TEST_CHECK_COMPLETE(node,
466                 "foo toto bar ", EC_NODE_ENDLIST,
467                 "titi", EC_NODE_ENDLIST);
468         ret |= EC_TEST_CHECK_COMPLETE(node,
469                 "x", EC_NODE_ENDLIST,
470                 EC_NODE_ENDLIST);
471         ret |= EC_TEST_CHECK_COMPLETE(node,
472                 "foo barx", EC_NODE_ENDLIST,
473                 EC_NODE_ENDLIST);
474         ret |= EC_TEST_CHECK_COMPLETE(node,
475                 "foo 'b", EC_NODE_ENDLIST,
476                 "'bar'", EC_NODE_ENDLIST);
477
478         ec_node_free(node);
479         return ret;
480 }
481 /* LCOV_EXCL_STOP */
482
483 static struct ec_test ec_node_sh_lex_test = {
484         .name = "node_sh_lex",
485         .test = ec_node_sh_lex_testcase,
486 };
487
488 EC_TEST_REGISTER(ec_node_sh_lex_test);