save
[protos/libecoli.git] / lib / ecoli_tk_shlex.c
1 /*
2  * Copyright (c) 2016, Olivier MATZ <zer0@droids-corp.org>
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of the University of California, Berkeley nor the
13  *       names of its contributors may be used to endorse or promote products
14  *       derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <assert.h>
32 #include <stdarg.h>
33 #include <ctype.h>
34 #include <errno.h>
35
36 #include <ecoli_malloc.h>
37 #include <ecoli_log.h>
38 #include <ecoli_test.h>
39 #include <ecoli_strvec.h>
40 #include <ecoli_tk.h>
41 #include <ecoli_tk_seq.h>
42 #include <ecoli_tk_str.h>
43 #include <ecoli_tk_option.h>
44 #include <ecoli_tk_shlex.h>
45
46 struct ec_tk_shlex {
47         struct ec_tk gen;
48         struct ec_tk *child;
49 };
50
51 static size_t eat_spaces(const char *str)
52 {
53         size_t i = 0;
54
55         /* skip spaces */
56         while (isblank(str[i]))
57                 i++;
58
59         return i;
60 }
61
62 /*
63  * Allocate a new string which is a copy of the input string with quotes
64  * removed. If quotes are not closed properly, set missing_quote to the
65  * missing quote char.
66  */
67 static char *unquote_str(const char *str, size_t n, int allow_missing_quote,
68         char *missing_quote)
69 {
70         unsigned s = 1, d = 0;
71         char quote = str[0];
72         char *dst;
73         int closed = 0;
74
75         dst = ec_malloc(n);
76         if (dst == NULL) {
77                 errno = ENOMEM;
78                 return NULL;
79         }
80
81         /* copy token and remove quotes */
82         while (s < n && d < n && str[s] != '\0') {
83                 if (str[s] == '\\' && str[s+1] == quote) {
84                         dst[d++] = quote;
85                         s += 2;
86                         continue;
87                 }
88                 if (str[s] == '\\' && str[s+1] == '\\') {
89                         dst[d++] = '\\';
90                         s += 2;
91                         continue;
92                 }
93                 if (str[s] == quote) {
94                         s++;
95                         closed = 1;
96                         break;
97                 }
98                 dst[d++] = str[s++];
99         }
100
101         /* not enough room in dst buffer (should not happen) */
102         if (d >= n) {
103                 ec_free(dst);
104                 errno = EMSGSIZE;
105                 return NULL;
106         }
107
108         /* quote not closed */
109         if (closed == 0) {
110                 if (missing_quote != NULL)
111                         *missing_quote = str[0];
112                 if (allow_missing_quote == 0) {
113                         ec_free(dst);
114                         errno = EINVAL;
115                         return NULL;
116                 }
117         }
118         dst[d++] = '\0';
119
120         return dst;
121 }
122
123 static size_t eat_quoted_str(const char *str)
124 {
125         size_t i = 0;
126         char quote = str[0];
127
128         while (str[i] != '\0') {
129                 if (str[i] != '\\' && str[i+1] == quote)
130                         return i + 2;
131                 i++;
132         }
133
134         /* unclosed quote, will be detected later */
135         return i;
136 }
137
138 static size_t eat_str(const char *str)
139 {
140         size_t i = 0;
141
142         /* skip spaces */
143         while (!isblank(str[i]) && str[i] != '\0')
144                 i++;
145
146         return i;
147 }
148
149 static struct ec_strvec *tokenize(const char *str, int completion,
150         int allow_missing_quote, char *missing_quote)
151 {
152         struct ec_strvec *strvec = NULL;
153         size_t off = 0, len, suboff, sublen;
154         char *word = NULL, *concat = NULL, *tmp;
155         int last_is_space = 1;
156
157 //      printf("str=%s\n", str);
158
159         strvec = ec_strvec_new();
160         if (strvec == NULL)
161                 goto fail;
162
163         while (str[off] != '\0') {
164                 len = eat_spaces(&str[off]);
165                 if (len > 0)
166                         last_is_space = 1;
167 //              printf("space=%zd\n", len);
168                 off += len;
169
170                 len = 0;
171                 suboff = off;
172                 while (str[suboff] != '\0') {
173                         last_is_space = 0;
174                         if (str[suboff] == '"' || str[suboff] == '\'') {
175                                 sublen = eat_quoted_str(&str[suboff]);
176 //                              printf("sublen=%zd\n", sublen);
177                                 word = unquote_str(&str[suboff], sublen,
178                                         allow_missing_quote, missing_quote);
179                         } else {
180                                 sublen = eat_str(&str[suboff]);
181 //                              printf("sublen=%zd\n", sublen);
182                                 if (sublen == 0)
183                                         break;
184                                 word = ec_strndup(&str[suboff], sublen);
185                         }
186
187                         if (word == NULL)
188                                 goto fail;
189 //                      printf("word=%s\n", word);
190
191                         len += sublen;
192                         suboff += sublen;
193
194                         if (concat == NULL) {
195                                 concat = word;
196                                 word = NULL;
197                         } else {
198                                 tmp = ec_realloc(concat, len + 1);
199                                 if (tmp == NULL)
200                                         goto fail;
201                                 concat = tmp;
202                                 strcat(concat, word);
203                                 ec_free(word);
204                                 word = NULL;
205                         }
206                 }
207
208                 if (concat != NULL) {
209                         if (ec_strvec_add(strvec, concat) < 0)
210                                 goto fail;
211                         ec_free(concat);
212                         concat = NULL;
213                 }
214
215                 /* XXX remove all printf comments */
216 //              printf("str off=%zd len=%zd\n", off, len);
217                 off += len;
218         }
219
220         /* in completion mode, append an empty token if the string ends
221          * with space */
222         if (completion && last_is_space) {
223                 if (ec_strvec_add(strvec, "") < 0)
224                         goto fail;
225         }
226
227         return strvec;
228
229  fail:
230         ec_free(word);
231         ec_free(concat);
232         ec_strvec_free(strvec);
233         return NULL;
234 }
235
236 static struct ec_parsed_tk *ec_tk_shlex_parse(const struct ec_tk *gen_tk,
237         const struct ec_strvec *strvec)
238 {
239         struct ec_tk_shlex *tk = (struct ec_tk_shlex *)gen_tk;
240         struct ec_strvec *new_vec = NULL, *match_strvec;
241         struct ec_parsed_tk *parsed_tk = NULL, *child_parsed_tk;
242         const char *str;
243
244         parsed_tk = ec_parsed_tk_new();
245         if (parsed_tk == NULL)
246                 return NULL;
247
248         if (ec_strvec_len(strvec) == 0)
249                 return parsed_tk;
250
251         str = ec_strvec_val(strvec, 0);
252         new_vec = tokenize(str, 0, 0, NULL);
253         if (new_vec == NULL)
254                 goto fail;
255
256         child_parsed_tk = ec_tk_parse_tokens(tk->child, new_vec);
257         if (child_parsed_tk == NULL)
258                 goto fail;
259
260         if (!ec_parsed_tk_matches(child_parsed_tk) ||
261                         ec_parsed_tk_len(child_parsed_tk) !=
262                                 ec_strvec_len(new_vec)) {
263                 ec_strvec_free(new_vec);
264                 ec_parsed_tk_free(child_parsed_tk);
265                 return parsed_tk;
266         }
267         ec_strvec_free(new_vec);
268         new_vec = NULL;
269
270         ec_parsed_tk_add_child(parsed_tk, child_parsed_tk);
271         match_strvec = ec_strvec_ndup(strvec, 0, 1);
272         if (match_strvec == NULL)
273                 goto fail;
274         ec_parsed_tk_set_match(parsed_tk, gen_tk, match_strvec);
275
276         return parsed_tk;
277
278  fail:
279         ec_strvec_free(new_vec);
280         ec_parsed_tk_free(parsed_tk);
281
282         return NULL;
283 }
284
285 static struct ec_completed_tk *ec_tk_shlex_complete(const struct ec_tk *gen_tk,
286         const struct ec_strvec *strvec)
287 {
288         struct ec_tk_shlex *tk = (struct ec_tk_shlex *)gen_tk;
289         struct ec_completed_tk *completed_tk, *child_completed_tk = NULL;
290         struct ec_strvec *new_vec = NULL;
291         const char *str;
292         char missing_quote;
293
294 //      printf("==================\n");
295         completed_tk = ec_completed_tk_new();
296         if (completed_tk == NULL)
297                 return NULL;
298
299         if (ec_strvec_len(strvec) != 1)
300                 return completed_tk;
301
302         str = ec_strvec_val(strvec, 0);
303         new_vec = tokenize(str, 1, 1, &missing_quote);
304         if (new_vec == NULL)
305                 goto fail;
306
307 //      ec_strvec_dump(new_vec, stdout);
308
309         child_completed_tk = ec_tk_complete_tokens(tk->child, new_vec);
310         if (child_completed_tk == NULL)
311                 goto fail;
312
313         ec_strvec_free(new_vec);
314         new_vec = NULL;
315         ec_completed_tk_merge(completed_tk, child_completed_tk);
316
317         return completed_tk;
318
319
320 #if 0
321         for (i = 0, t = &tokens[0]; i < tk->len; i++, t++) {
322                 if (*(t + 1) != NULL) {
323                         child_parsed_tk = ec_tk_parse(tk->table[i], *t);
324                         if (child_parsed_tk == NULL)
325                                 goto fail;
326
327                         if (strlen(child_parsed_tk->str) == 0)
328                                 t--;
329                         else if (strlen(child_parsed_tk->str) != strlen(*t)) {
330                                 ec_parsed_tk_free(child_parsed_tk);
331                                 goto fail;
332                         }
333
334                         ec_parsed_tk_free(child_parsed_tk);
335                 } else {
336                         child_completed_tk = ec_tk_complete(tk->table[i], *t);
337                         if (child_completed_tk == NULL) {
338                                 ec_completed_tk_free(completed_tk);
339                                 return NULL;
340                         }
341                         ec_completed_tk_merge(completed_tk, child_completed_tk);
342
343                         child_parsed_tk = ec_tk_parse(tk->table[i], "");
344                         if (child_parsed_tk == NULL)
345                                 break;
346                         ec_parsed_tk_free(child_parsed_tk);
347                         t--;
348                 }
349         }
350
351         if (tokens != NULL) {
352                 for (t = &tokens[0]; *t != NULL; t++)
353                         ec_free(*t);
354                 ec_free(tokens);
355                 tokens = NULL;
356         }
357
358         ec_completed_tk_dump(stdout, completed_tk);
359 #endif
360
361  fail:
362         ec_strvec_free(new_vec);
363         ec_completed_tk_free(completed_tk);
364         return NULL;
365 }
366
367 static void ec_tk_shlex_free_priv(struct ec_tk *gen_tk)
368 {
369         struct ec_tk_shlex *tk = (struct ec_tk_shlex *)gen_tk;
370
371         ec_tk_free(tk->child);
372 }
373
374 static struct ec_tk_ops ec_tk_shlex_ops = {
375         .typename = "shlex",
376         .parse = ec_tk_shlex_parse,
377         .complete = ec_tk_shlex_complete,
378         .free_priv = ec_tk_shlex_free_priv,
379 };
380
381 struct ec_tk *ec_tk_shlex_new(const char *id, struct ec_tk *child)
382 {
383         struct ec_tk_shlex *tk = NULL;
384
385         if (child == NULL)
386                 return NULL;
387
388         tk = (struct ec_tk_shlex *)ec_tk_new(id, &ec_tk_shlex_ops,
389                 sizeof(*tk));
390         if (tk == NULL) {
391                 ec_tk_free(child);
392                 return NULL;
393         }
394
395         tk->child = child;
396
397         return &tk->gen;
398 }
399
400 static int ec_tk_shlex_testcase(void)
401 {
402         struct ec_tk *tk;
403         int ret = 0;
404
405         tk = ec_tk_shlex_new(NULL,
406                 EC_TK_SEQ(NULL,
407                         ec_tk_str(NULL, "foo"),
408                         ec_tk_option_new(NULL,
409                                 ec_tk_str(NULL, "toto")
410                         ),
411                         ec_tk_str(NULL, "bar")
412                 )
413         );
414         if (tk == NULL) {
415                 ec_log(EC_LOG_ERR, "cannot create tk\n");
416                 return -1;
417         }
418         ret |= EC_TEST_CHECK_TK_PARSE(tk, 1, "foo bar");
419         ret |= EC_TEST_CHECK_TK_PARSE(tk, 1, "  foo   bar");
420         ret |= EC_TEST_CHECK_TK_PARSE(tk, 1, "  'foo' \"bar\"");
421         ret |= EC_TEST_CHECK_TK_PARSE(tk, 1, "  'f'oo 'toto' bar");
422         ec_tk_free(tk);
423
424         /* test completion */
425         tk = ec_tk_shlex_new(NULL,
426                 EC_TK_SEQ(NULL,
427                         ec_tk_str(NULL, "foo"),
428                         ec_tk_option_new(NULL,
429                                 ec_tk_str(NULL, "toto")
430                         ),
431                         ec_tk_str(NULL, "bar"),
432                         ec_tk_str(NULL, "titi")
433                 )
434         );
435         if (tk == NULL) {
436                 ec_log(EC_LOG_ERR, "cannot create tk\n");
437                 return -1;
438         }
439         ret |= EC_TEST_CHECK_TK_COMPLETE(tk,
440                 "", EC_TK_ENDLIST,
441                 "foo", EC_TK_ENDLIST,
442                 "foo");
443         ret |= EC_TEST_CHECK_TK_COMPLETE(tk,
444                 " ", EC_TK_ENDLIST,
445                 "foo", EC_TK_ENDLIST,
446                 "foo");
447         ret |= EC_TEST_CHECK_TK_COMPLETE(tk,
448                 "f", EC_TK_ENDLIST,
449                 "oo", EC_TK_ENDLIST,
450                 "oo");
451         ret |= EC_TEST_CHECK_TK_COMPLETE(tk,
452                 "foo", EC_TK_ENDLIST,
453                 "", EC_TK_ENDLIST,
454                 "");
455         ret |= EC_TEST_CHECK_TK_COMPLETE(tk,
456                 "foo ", EC_TK_ENDLIST,
457                 "bar", "toto", EC_TK_ENDLIST,
458                 "");
459         ret |= EC_TEST_CHECK_TK_COMPLETE(tk,
460                 "foo t", EC_TK_ENDLIST,
461                 "oto", EC_TK_ENDLIST,
462                 "oto");
463         ret |= EC_TEST_CHECK_TK_COMPLETE(tk,
464                 "foo b", EC_TK_ENDLIST,
465                 "ar", EC_TK_ENDLIST,
466                 "ar");
467         ret |= EC_TEST_CHECK_TK_COMPLETE(tk,
468                 "foo bar", EC_TK_ENDLIST,
469                 "", EC_TK_ENDLIST,
470                 "");
471         ret |= EC_TEST_CHECK_TK_COMPLETE(tk,
472                 "foo bar ", EC_TK_ENDLIST,
473                 "titi", EC_TK_ENDLIST,
474                 "titi");
475         ret |= EC_TEST_CHECK_TK_COMPLETE(tk,
476                 "foo toto bar ", EC_TK_ENDLIST,
477                 "titi", EC_TK_ENDLIST,
478                 "titi");
479         ret |= EC_TEST_CHECK_TK_COMPLETE(tk,
480                 "x", EC_TK_ENDLIST,
481                 EC_TK_ENDLIST,
482                 "");
483         ret |= EC_TEST_CHECK_TK_COMPLETE(tk,
484                 "foo barx", EC_TK_ENDLIST,
485                 EC_TK_ENDLIST,
486                 "");
487
488         ec_tk_free(tk);
489         return ret;
490 }
491
492 static struct ec_test ec_tk_shlex_test = {
493         .name = "tk_shlex",
494         .test = ec_tk_shlex_testcase,
495 };
496
497 EC_REGISTER_TEST(ec_tk_shlex_test);