1 /*
2  *  TCC - Tiny C Compiler
3  *
4  *  Copyright (c) 2001-2004 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  */
20 
21 #define USING_GLOBALS
22 #include "tcc.h"
23 
24 /********************************************************/
25 /* global variables */
26 
27 ST_DATA int tok_flags;
28 ST_DATA int parse_flags;
29 
30 ST_DATA struct BufferedFile *file;
31 ST_DATA int ch, tok;
32 ST_DATA CValue tokc;
33 ST_DATA const int *macro_ptr;
34 ST_DATA CString tokcstr; /* current parsed string, if any */
35 
36 /* display benchmark infos */
37 ST_DATA int tok_ident;
38 ST_DATA TokenSym **table_ident;
39 
40 /* ------------------------------------------------------------------------- */
41 
42 static TokenSym *hash_ident[TOK_HASH_SIZE];
43 static char token_buf[STRING_MAX_SIZE + 1];
44 static CString cstr_buf;
45 static CString macro_equal_buf;
46 static TokenString tokstr_buf;
47 static unsigned char isidnum_table[256 - CH_EOF];
48 static int pp_debug_tok, pp_debug_symv;
49 static int pp_once;
50 static int pp_expr;
51 static int pp_counter;
52 static void tok_print(const char *msg, const int *str);
53 
54 static struct TinyAlloc *toksym_alloc;
55 static struct TinyAlloc *tokstr_alloc;
56 
57 static TokenString *macro_stack;
58 
59 static const char tcc_keywords[] =
60 #define DEF(id, str) str "\0"
61 #include "tcctok.h"
62 #undef DEF
63 ;
64 
65 /* WARNING: the content of this string encodes token numbers */
66 static const unsigned char tok_two_chars[] =
67 /* outdated -- gr
68     "<=\236>=\235!=\225&&\240||\241++\244--\242==\224<<\1>>\2+=\253"
69     "-=\255*=\252/=\257%=\245&=\246^=\336|=\374->\313..\250##\266";
70 */{
71     '<','=', TOK_LE,
72     '>','=', TOK_GE,
73     '!','=', TOK_NE,
74     '&','&', TOK_LAND,
75     '|','|', TOK_LOR,
76     '+','+', TOK_INC,
77     '-','-', TOK_DEC,
78     '=','=', TOK_EQ,
79     '<','<', TOK_SHL,
80     '>','>', TOK_SAR,
81     '+','=', TOK_A_ADD,
82     '-','=', TOK_A_SUB,
83     '*','=', TOK_A_MUL,
84     '/','=', TOK_A_DIV,
85     '%','=', TOK_A_MOD,
86     '&','=', TOK_A_AND,
87     '^','=', TOK_A_XOR,
88     '|','=', TOK_A_OR,
89     '-','>', TOK_ARROW,
90     '.','.', TOK_TWODOTS,
91     '#','#', TOK_TWOSHARPS,
92     0
93 };
94 
95 static void next_nomacro_spc(void);
96 
skip(int c)97 ST_FUNC void skip(int c)
98 {
99     if (tok != c)
100         tcc_error("'%c' expected (got \"%s\")", c, get_tok_str(tok, &tokc));
101     next();
102 }
103 
expect(const char *msg)104 ST_FUNC void expect(const char *msg)
105 {
106     tcc_error("%s expected", msg);
107 }
108 
109 /* ------------------------------------------------------------------------- */
110 /* Custom allocator for tiny objects */
111 
112 #define USE_TAL
113 
114 #ifndef USE_TAL
115 #define tal_free(al, p) tcc_free(p)
116 #define tal_realloc(al, p, size) tcc_realloc(p, size)
117 #define tal_new(a,b,c)
118 #define tal_delete(a)
119 #else
120 #if !defined(MEM_DEBUG)
121 #define tal_free(al, p) tal_free_impl(al, p)
122 #define tal_realloc(al, p, size) tal_realloc_impl(&al, p, size)
123 #define TAL_DEBUG_PARAMS
124 #else
125 #define TAL_DEBUG 1
126 //#define TAL_INFO 1 /* collect and dump allocators stats */
127 #define tal_free(al, p) tal_free_impl(al, p, __FILE__, __LINE__)
128 #define tal_realloc(al, p, size) tal_realloc_impl(&al, p, size, __FILE__, __LINE__)
129 #define TAL_DEBUG_PARAMS , const char *file, int line
130 #define TAL_DEBUG_FILE_LEN 40
131 #endif
132 
133 #define TOKSYM_TAL_SIZE     (768 * 1024) /* allocator for tiny TokenSym in table_ident */
134 #define TOKSTR_TAL_SIZE     (768 * 1024) /* allocator for tiny TokenString instances */
135 #define CSTR_TAL_SIZE       (256 * 1024) /* allocator for tiny CString instances */
136 #define TOKSYM_TAL_LIMIT    256 /* prefer unique limits to distinguish allocators debug msgs */
137 #define TOKSTR_TAL_LIMIT    128 /* 32 * sizeof(int) */
138 #define CSTR_TAL_LIMIT      1024
139 
140 typedef struct TinyAlloc {
141     unsigned  limit;
142     unsigned  size;
143     uint8_t *buffer;
144     uint8_t *p;
145     unsigned  nb_allocs;
146     struct TinyAlloc *next, *top;
147 #ifdef TAL_INFO
148     unsigned  nb_peak;
149     unsigned  nb_total;
150     unsigned  nb_missed;
151     uint8_t *peak_p;
152 #endif
153 } TinyAlloc;
154 
155 typedef struct tal_header_t {
156     unsigned  size;
157 #ifdef TAL_DEBUG
158     int     line_num; /* negative line_num used for double free check */
159     char    file_name[TAL_DEBUG_FILE_LEN + 1];
160 #endif
161 } tal_header_t;
162 
163 /* ------------------------------------------------------------------------- */
164 
tal_new(TinyAlloc **pal, unsigned limit, unsigned size)165 static TinyAlloc *tal_new(TinyAlloc **pal, unsigned limit, unsigned size)
166 {
167     TinyAlloc *al = tcc_mallocz(sizeof(TinyAlloc));
168     al->p = al->buffer = tcc_malloc(size);
169     al->limit = limit;
170     al->size = size;
171     if (pal) *pal = al;
172     return al;
173 }
174 
tal_delete(TinyAlloc *al)175 static void tal_delete(TinyAlloc *al)
176 {
177     TinyAlloc *next;
178 
179 tail_call:
180     if (!al)
181         return;
182 #ifdef TAL_INFO
183     fprintf(stderr, "limit=%5d, size=%5g MB, nb_peak=%6d, nb_total=%8d, nb_missed=%6d, usage=%5.1f%%\n",
184             al->limit, al->size / 1024.0 / 1024.0, al->nb_peak, al->nb_total, al->nb_missed,
185             (al->peak_p - al->buffer) * 100.0 / al->size);
186 #endif
187 #ifdef TAL_DEBUG
188     if (al->nb_allocs > 0) {
189         uint8_t *p;
190         fprintf(stderr, "TAL_DEBUG: memory leak %d chunk(s) (limit= %d)\n",
191                 al->nb_allocs, al->limit);
192         p = al->buffer;
193         while (p < al->p) {
194             tal_header_t *header = (tal_header_t *)p;
195             if (header->line_num > 0) {
196                 fprintf(stderr, "%s:%d: chunk of %d bytes leaked\n",
197                         header->file_name, header->line_num, header->size);
198             }
199             p += header->size + sizeof(tal_header_t);
200         }
201 #if MEM_DEBUG-0 == 2
202         exit(2);
203 #endif
204     }
205 #endif
206     next = al->next;
207     tcc_free(al->buffer);
208     tcc_free(al);
209     al = next;
210     goto tail_call;
211 }
212 
tal_free_impl(TinyAlloc *al, void *p TAL_DEBUG_PARAMS)213 static void tal_free_impl(TinyAlloc *al, void *p TAL_DEBUG_PARAMS)
214 {
215     if (!p)
216         return;
217 tail_call:
218     if (al->buffer <= (uint8_t *)p && (uint8_t *)p < al->buffer + al->size) {
219 #ifdef TAL_DEBUG
220         tal_header_t *header = (((tal_header_t *)p) - 1);
221         if (header->line_num < 0) {
222             fprintf(stderr, "%s:%d: TAL_DEBUG: double frees chunk from\n",
223                     file, line);
224             fprintf(stderr, "%s:%d: %d bytes\n",
225                     header->file_name, (int)-header->line_num, (int)header->size);
226         } else
227             header->line_num = -header->line_num;
228 #endif
229         al->nb_allocs--;
230         if (!al->nb_allocs)
231             al->p = al->buffer;
232     } else if (al->next) {
233         al = al->next;
234         goto tail_call;
235     }
236     else
237         tcc_free(p);
238 }
239 
tal_realloc_impl(TinyAlloc **pal, void *p, unsigned size TAL_DEBUG_PARAMS)240 static void *tal_realloc_impl(TinyAlloc **pal, void *p, unsigned size TAL_DEBUG_PARAMS)
241 {
242     tal_header_t *header;
243     void *ret;
244     int is_own;
245     unsigned adj_size = (size + 3) & -4;
246     TinyAlloc *al = *pal;
247 
248 tail_call:
249     is_own = (al->buffer <= (uint8_t *)p && (uint8_t *)p < al->buffer + al->size);
250     if ((!p || is_own) && size <= al->limit) {
251         if (al->p - al->buffer + adj_size + sizeof(tal_header_t) < al->size) {
252             header = (tal_header_t *)al->p;
253             header->size = adj_size;
254 #ifdef TAL_DEBUG
255             { int ofs = strlen(file) - TAL_DEBUG_FILE_LEN;
256             strncpy(header->file_name, file + (ofs > 0 ? ofs : 0), TAL_DEBUG_FILE_LEN);
257             header->file_name[TAL_DEBUG_FILE_LEN] = 0;
258             header->line_num = line; }
259 #endif
260             ret = al->p + sizeof(tal_header_t);
261             al->p += adj_size + sizeof(tal_header_t);
262             if (is_own) {
263                 header = (((tal_header_t *)p) - 1);
264                 memcpy(ret, p, header->size);
265 #ifdef TAL_DEBUG
266                 header->line_num = -header->line_num;
267 #endif
268             } else {
269                 al->nb_allocs++;
270             }
271 #ifdef TAL_INFO
272             if (al->nb_peak < al->nb_allocs)
273                 al->nb_peak = al->nb_allocs;
274             if (al->peak_p < al->p)
275                 al->peak_p = al->p;
276             al->nb_total++;
277 #endif
278             return ret;
279         } else if (is_own) {
280             al->nb_allocs--;
281             ret = tal_realloc(*pal, 0, size);
282             header = (((tal_header_t *)p) - 1);
283             memcpy(ret, p, header->size);
284 #ifdef TAL_DEBUG
285             header->line_num = -header->line_num;
286 #endif
287             return ret;
288         }
289         if (al->next) {
290             al = al->next;
291         } else {
292             TinyAlloc *bottom = al, *next = al->top ? al->top : al;
293 
294             al = tal_new(pal, next->limit, next->size * 2);
295             al->next = next;
296             bottom->top = al;
297         }
298         goto tail_call;
299     }
300     if (is_own) {
301         al->nb_allocs--;
302         ret = tcc_malloc(size);
303         header = (((tal_header_t *)p) - 1);
304         memcpy(ret, p, header->size);
305 #ifdef TAL_DEBUG
306         header->line_num = -header->line_num;
307 #endif
308     } else if (al->next) {
309         al = al->next;
310         goto tail_call;
311     } else
312         ret = tcc_realloc(p, size);
313 #ifdef TAL_INFO
314     al->nb_missed++;
315 #endif
316     return ret;
317 }
318 
319 #endif /* USE_TAL */
320 
321 /* ------------------------------------------------------------------------- */
322 /* CString handling */
cstr_realloc(CString *cstr, int new_size)323 static void cstr_realloc(CString *cstr, int new_size)
324 {
325     int size;
326 
327     size = cstr->size_allocated;
328     if (size < 8)
329         size = 8; /* no need to allocate a too small first string */
330     while (size < new_size)
331         size = size * 2;
332     cstr->data = tcc_realloc(cstr->data, size);
333     cstr->size_allocated = size;
334 }
335 
336 /* add a byte */
cstr_ccat(CString *cstr, int ch)337 ST_INLN void cstr_ccat(CString *cstr, int ch)
338 {
339     int size;
340     size = cstr->size + 1;
341     if (size > cstr->size_allocated)
342         cstr_realloc(cstr, size);
343     ((unsigned char *)cstr->data)[size - 1] = ch;
344     cstr->size = size;
345 }
346 
cstr_cat(CString *cstr, const char *str, int len)347 ST_FUNC void cstr_cat(CString *cstr, const char *str, int len)
348 {
349     int size;
350     if (len <= 0)
351         len = strlen(str) + 1 + len;
352     size = cstr->size + len;
353     if (size > cstr->size_allocated)
354         cstr_realloc(cstr, size);
355     memmove(((unsigned char *)cstr->data) + cstr->size, str, len);
356     cstr->size = size;
357 }
358 
359 /* add a wide char */
cstr_wccat(CString *cstr, int ch)360 ST_FUNC void cstr_wccat(CString *cstr, int ch)
361 {
362     int size;
363     size = cstr->size + sizeof(nwchar_t);
364     if (size > cstr->size_allocated)
365         cstr_realloc(cstr, size);
366     *(nwchar_t *)(((unsigned char *)cstr->data) + size - sizeof(nwchar_t)) = ch;
367     cstr->size = size;
368 }
369 
cstr_new(CString *cstr)370 ST_FUNC void cstr_new(CString *cstr)
371 {
372     memset(cstr, 0, sizeof(CString));
373 }
374 
375 /* free string and reset it to NULL */
cstr_free(CString *cstr)376 ST_FUNC void cstr_free(CString *cstr)
377 {
378     tcc_free(cstr->data);
379     cstr_new(cstr);
380 }
381 
382 /* reset string to empty */
cstr_reset(CString *cstr)383 ST_FUNC void cstr_reset(CString *cstr)
384 {
385     cstr->size = 0;
386 }
387 
cstr_printf(CString *cstr, const char *fmt, ...)388 ST_FUNC int cstr_printf(CString *cstr, const char *fmt, ...)
389 {
390     va_list v;
391     int len, size;
392 
393     va_start(v, fmt);
394     len = vsnprintf(NULL, 0, fmt, v);
395     va_end(v);
396     size = cstr->size + len + 1;
397     if (size > cstr->size_allocated)
398         cstr_realloc(cstr, size);
399     va_start(v, fmt);
400     vsnprintf((char*)cstr->data + cstr->size, size, fmt, v);
401     va_end(v);
402     cstr->size += len;
403     return len;
404 }
405 
406 /* XXX: unicode ? */
add_char(CString *cstr, int c)407 static void add_char(CString *cstr, int c)
408 {
409     if (c == '\'' || c == '\"' || c == '\\') {
410         /* XXX: could be more precise if char or string */
411         cstr_ccat(cstr, '\\');
412     }
413     if (c >= 32 && c <= 126) {
414         cstr_ccat(cstr, c);
415     } else {
416         cstr_ccat(cstr, '\\');
417         if (c == '\n') {
418             cstr_ccat(cstr, 'n');
419         } else {
420             cstr_ccat(cstr, '0' + ((c >> 6) & 7));
421             cstr_ccat(cstr, '0' + ((c >> 3) & 7));
422             cstr_ccat(cstr, '0' + (c & 7));
423         }
424     }
425 }
426 
427 /* ------------------------------------------------------------------------- */
428 /* allocate a new token */
tok_alloc_new(TokenSym **pts, const char *str, int len)429 static TokenSym *tok_alloc_new(TokenSym **pts, const char *str, int len)
430 {
431     TokenSym *ts, **ptable;
432     int i;
433 
434     if (tok_ident >= SYM_FIRST_ANOM)
435         tcc_error("memory full (symbols)");
436 
437     /* expand token table if needed */
438     i = tok_ident - TOK_IDENT;
439     if ((i % TOK_ALLOC_INCR) == 0) {
440         ptable = tcc_realloc(table_ident, (i + TOK_ALLOC_INCR) * sizeof(TokenSym *));
441         table_ident = ptable;
442     }
443 
444     ts = tal_realloc(toksym_alloc, 0, sizeof(TokenSym) + len);
445     table_ident[i] = ts;
446     ts->tok = tok_ident++;
447     ts->sym_define = NULL;
448     ts->sym_label = NULL;
449     ts->sym_struct = NULL;
450     ts->sym_identifier = NULL;
451     ts->len = len;
452     ts->hash_next = NULL;
453     memcpy(ts->str, str, len);
454     ts->str[len] = '\0';
455     *pts = ts;
456     return ts;
457 }
458 
459 #define TOK_HASH_INIT 1
460 #define TOK_HASH_FUNC(h, c) ((h) + ((h) << 5) + ((h) >> 27) + (c))
461 
462 
463 /* find a token and add it if not found */
tok_alloc(const char *str, int len)464 ST_FUNC TokenSym *tok_alloc(const char *str, int len)
465 {
466     TokenSym *ts, **pts;
467     int i;
468     unsigned int h;
469 
470     h = TOK_HASH_INIT;
471     for(i=0;i<len;i++)
472         h = TOK_HASH_FUNC(h, ((unsigned char *)str)[i]);
473     h &= (TOK_HASH_SIZE - 1);
474 
475     pts = &hash_ident[h];
476     for(;;) {
477         ts = *pts;
478         if (!ts)
479             break;
480         if (ts->len == len && !memcmp(ts->str, str, len))
481             return ts;
482         pts = &(ts->hash_next);
483     }
484     return tok_alloc_new(pts, str, len);
485 }
486 
487 /* XXX: buffer overflow */
488 /* XXX: float tokens */
get_tok_str(int v, CValue *cv)489 ST_FUNC const char *get_tok_str(int v, CValue *cv)
490 {
491     char *p;
492     int i, len;
493 
494     cstr_reset(&cstr_buf);
495     p = cstr_buf.data;
496 
497     switch(v) {
498     case TOK_CINT:
499     case TOK_CUINT:
500     case TOK_CLONG:
501     case TOK_CULONG:
502     case TOK_CLLONG:
503     case TOK_CULLONG:
504         /* XXX: not quite exact, but only useful for testing  */
505 #ifdef _WIN32
506         sprintf(p, "%u", (unsigned)cv->i);
507 #else
508         sprintf(p, "%llu", (unsigned long long)cv->i);
509 #endif
510         break;
511     case TOK_LCHAR:
512         cstr_ccat(&cstr_buf, 'L');
513     case TOK_CCHAR:
514         cstr_ccat(&cstr_buf, '\'');
515         add_char(&cstr_buf, cv->i);
516         cstr_ccat(&cstr_buf, '\'');
517         cstr_ccat(&cstr_buf, '\0');
518         break;
519     case TOK_PPNUM:
520     case TOK_PPSTR:
521         return (char*)cv->str.data;
522     case TOK_LSTR:
523         cstr_ccat(&cstr_buf, 'L');
524     case TOK_STR:
525         cstr_ccat(&cstr_buf, '\"');
526         if (v == TOK_STR) {
527             len = cv->str.size - 1;
528             for(i=0;i<len;i++)
529                 add_char(&cstr_buf, ((unsigned char *)cv->str.data)[i]);
530         } else {
531             len = (cv->str.size / sizeof(nwchar_t)) - 1;
532             for(i=0;i<len;i++)
533                 add_char(&cstr_buf, ((nwchar_t *)cv->str.data)[i]);
534         }
535         cstr_ccat(&cstr_buf, '\"');
536         cstr_ccat(&cstr_buf, '\0');
537         break;
538 
539     case TOK_CFLOAT:
540         cstr_cat(&cstr_buf, "<float>", 0);
541         break;
542     case TOK_CDOUBLE:
543 	cstr_cat(&cstr_buf, "<double>", 0);
544 	break;
545     case TOK_CLDOUBLE:
546 	cstr_cat(&cstr_buf, "<long double>", 0);
547 	break;
548     case TOK_LINENUM:
549 	cstr_cat(&cstr_buf, "<linenumber>", 0);
550 	break;
551 
552     /* above tokens have value, the ones below don't */
553     case TOK_LT:
554         v = '<';
555         goto addv;
556     case TOK_GT:
557         v = '>';
558         goto addv;
559     case TOK_DOTS:
560         return strcpy(p, "...");
561     case TOK_A_SHL:
562         return strcpy(p, "<<=");
563     case TOK_A_SAR:
564         return strcpy(p, ">>=");
565     case TOK_EOF:
566         return strcpy(p, "<eof>");
567     default:
568         if (v < TOK_IDENT) {
569             /* search in two bytes table */
570             const unsigned char *q = tok_two_chars;
571             while (*q) {
572                 if (q[2] == v) {
573                     *p++ = q[0];
574                     *p++ = q[1];
575                     *p = '\0';
576                     return cstr_buf.data;
577                 }
578                 q += 3;
579             }
580         if (v >= 127) {
581             sprintf(cstr_buf.data, "<%02x>", v);
582             return cstr_buf.data;
583         }
584         addv:
585             *p++ = v;
586             *p = '\0';
587         } else if (v < tok_ident) {
588             return table_ident[v - TOK_IDENT]->str;
589         } else if (v >= SYM_FIRST_ANOM) {
590             /* special name for anonymous symbol */
591             sprintf(p, "L.%u", v - SYM_FIRST_ANOM);
592         } else {
593             /* should never happen */
594             return NULL;
595         }
596         break;
597     }
598     return cstr_buf.data;
599 }
600 
601 /* return the current character, handling end of block if necessary
602    (but not stray) */
handle_eob(void)603 static int handle_eob(void)
604 {
605     BufferedFile *bf = file;
606     int len;
607 
608     /* only tries to read if really end of buffer */
609     if (bf->buf_ptr >= bf->buf_end) {
610         if (bf->fd >= 0) {
611 #if defined(PARSE_DEBUG)
612             len = 1;
613 #else
614             len = IO_BUF_SIZE;
615 #endif
616             len = read(bf->fd, bf->buffer, len);
617             if (len < 0)
618                 len = 0;
619         } else {
620             len = 0;
621         }
622         total_bytes += len;
623         bf->buf_ptr = bf->buffer;
624         bf->buf_end = bf->buffer + len;
625         *bf->buf_end = CH_EOB;
626     }
627     if (bf->buf_ptr < bf->buf_end) {
628         return bf->buf_ptr[0];
629     } else {
630         bf->buf_ptr = bf->buf_end;
631         return CH_EOF;
632     }
633 }
634 
635 /* read next char from current input file and handle end of input buffer */
inp(void)636 static inline void inp(void)
637 {
638     ch = *(++(file->buf_ptr));
639     /* end of buffer/file handling */
640     if (ch == CH_EOB)
641         ch = handle_eob();
642 }
643 
644 /* handle '\[\r]\n' */
handle_stray_noerror(void)645 static int handle_stray_noerror(void)
646 {
647     while (ch == '\\') {
648         inp();
649         if (ch == '\n') {
650             file->line_num++;
651             inp();
652         } else if (ch == '\r') {
653             inp();
654             if (ch != '\n')
655                 goto fail;
656             file->line_num++;
657             inp();
658         } else {
659         fail:
660             return 1;
661         }
662     }
663     return 0;
664 }
665 
handle_stray(void)666 static void handle_stray(void)
667 {
668     if (handle_stray_noerror())
669         tcc_error("stray '\\' in program");
670 }
671 
672 /* skip the stray and handle the \\n case. Output an error if
673    incorrect char after the stray */
handle_stray1(uint8_t *p)674 static int handle_stray1(uint8_t *p)
675 {
676     int c;
677 
678     file->buf_ptr = p;
679     if (p >= file->buf_end) {
680         c = handle_eob();
681         if (c != '\\')
682             return c;
683         p = file->buf_ptr;
684     }
685     ch = *p;
686     if (handle_stray_noerror()) {
687         if (!(parse_flags & PARSE_FLAG_ACCEPT_STRAYS))
688             tcc_error("stray '\\' in program");
689         *--file->buf_ptr = '\\';
690     }
691     p = file->buf_ptr;
692     c = *p;
693     return c;
694 }
695 
696 /* handle just the EOB case, but not stray */
697 #define PEEKC_EOB(c, p)\
698 {\
699     p++;\
700     c = *p;\
701     if (c == '\\') {\
702         file->buf_ptr = p;\
703         c = handle_eob();\
704         p = file->buf_ptr;\
705     }\
706 }
707 
708 /* handle the complicated stray case */
709 #define PEEKC(c, p)\
710 {\
711     p++;\
712     c = *p;\
713     if (c == '\\') {\
714         c = handle_stray1(p);\
715         p = file->buf_ptr;\
716     }\
717 }
718 
719 /* input with '\[\r]\n' handling. Note that this function cannot
720    handle other characters after '\', so you cannot call it inside
721    strings or comments */
minp(void)722 static void minp(void)
723 {
724     inp();
725     if (ch == '\\')
726         handle_stray();
727 }
728 
729 /* single line C++ comments */
parse_line_comment(uint8_t *p)730 static uint8_t *parse_line_comment(uint8_t *p)
731 {
732     int c;
733 
734     p++;
735     for(;;) {
736         c = *p;
737     redo:
738         if (c == '\n' || c == CH_EOF) {
739             break;
740         } else if (c == '\\') {
741             file->buf_ptr = p;
742             c = handle_eob();
743             p = file->buf_ptr;
744             if (c == '\\') {
745                 PEEKC_EOB(c, p);
746                 if (c == '\n') {
747                     file->line_num++;
748                     PEEKC_EOB(c, p);
749                 } else if (c == '\r') {
750                     PEEKC_EOB(c, p);
751                     if (c == '\n') {
752                         file->line_num++;
753                         PEEKC_EOB(c, p);
754                     }
755                 }
756             } else {
757                 goto redo;
758             }
759         } else {
760             p++;
761         }
762     }
763     return p;
764 }
765 
766 /* C comments */
parse_comment(uint8_t *p)767 static uint8_t *parse_comment(uint8_t *p)
768 {
769     int c;
770 
771     p++;
772     for(;;) {
773         /* fast skip loop */
774         for(;;) {
775             c = *p;
776             if (c == '\n' || c == '*' || c == '\\')
777                 break;
778             p++;
779             c = *p;
780             if (c == '\n' || c == '*' || c == '\\')
781                 break;
782             p++;
783         }
784         /* now we can handle all the cases */
785         if (c == '\n') {
786             file->line_num++;
787             p++;
788         } else if (c == '*') {
789             p++;
790             for(;;) {
791                 c = *p;
792                 if (c == '*') {
793                     p++;
794                 } else if (c == '/') {
795                     goto end_of_comment;
796                 } else if (c == '\\') {
797                     file->buf_ptr = p;
798                     c = handle_eob();
799                     p = file->buf_ptr;
800                     if (c == CH_EOF)
801                         tcc_error("unexpected end of file in comment");
802                     if (c == '\\') {
803                         /* skip '\[\r]\n', otherwise just skip the stray */
804                         while (c == '\\') {
805                             PEEKC_EOB(c, p);
806                             if (c == '\n') {
807                                 file->line_num++;
808                                 PEEKC_EOB(c, p);
809                             } else if (c == '\r') {
810                                 PEEKC_EOB(c, p);
811                                 if (c == '\n') {
812                                     file->line_num++;
813                                     PEEKC_EOB(c, p);
814                                 }
815                             } else {
816                                 goto after_star;
817                             }
818                         }
819                     }
820                 } else {
821                     break;
822                 }
823             }
824         after_star: ;
825         } else {
826             /* stray, eob or eof */
827             file->buf_ptr = p;
828             c = handle_eob();
829             p = file->buf_ptr;
830             if (c == CH_EOF) {
831                 tcc_error("unexpected end of file in comment");
832             } else if (c == '\\') {
833                 p++;
834             }
835         }
836     }
837  end_of_comment:
838     p++;
839     return p;
840 }
841 
set_idnum(int c, int val)842 ST_FUNC int set_idnum(int c, int val)
843 {
844     int prev = isidnum_table[c - CH_EOF];
845     isidnum_table[c - CH_EOF] = val;
846     return prev;
847 }
848 
849 #define cinp minp
850 
skip_spaces(void)851 static inline void skip_spaces(void)
852 {
853     while (isidnum_table[ch - CH_EOF] & IS_SPC)
854         cinp();
855 }
856 
check_space(int t, int *spc)857 static inline int check_space(int t, int *spc)
858 {
859     if (t < 256 && (isidnum_table[t - CH_EOF] & IS_SPC)) {
860         if (*spc)
861             return 1;
862         *spc = 1;
863     } else
864         *spc = 0;
865     return 0;
866 }
867 
868 /* parse a string without interpreting escapes */
parse_pp_string(uint8_t *p, int sep, CString *str)869 static uint8_t *parse_pp_string(uint8_t *p,
870                                 int sep, CString *str)
871 {
872     int c;
873     p++;
874     for(;;) {
875         c = *p;
876         if (c == sep) {
877             break;
878         } else if (c == '\\') {
879             file->buf_ptr = p;
880             c = handle_eob();
881             p = file->buf_ptr;
882             if (c == CH_EOF) {
883             unterminated_string:
884                 /* XXX: indicate line number of start of string */
885                 tcc_error("missing terminating %c character", sep);
886             } else if (c == '\\') {
887                 /* escape : just skip \[\r]\n */
888                 PEEKC_EOB(c, p);
889                 if (c == '\n') {
890                     file->line_num++;
891                     p++;
892                 } else if (c == '\r') {
893                     PEEKC_EOB(c, p);
894                     if (c != '\n')
895                         expect("'\n' after '\r'");
896                     file->line_num++;
897                     p++;
898                 } else if (c == CH_EOF) {
899                     goto unterminated_string;
900                 } else {
901                     if (str) {
902                         cstr_ccat(str, '\\');
903                         cstr_ccat(str, c);
904                     }
905                     p++;
906                 }
907             }
908         } else if (c == '\n') {
909             file->line_num++;
910             goto add_char;
911         } else if (c == '\r') {
912             PEEKC_EOB(c, p);
913             if (c != '\n') {
914                 if (str)
915                     cstr_ccat(str, '\r');
916             } else {
917                 file->line_num++;
918                 goto add_char;
919             }
920         } else {
921         add_char:
922             if (str)
923                 cstr_ccat(str, c);
924             p++;
925         }
926     }
927     p++;
928     return p;
929 }
930 
931 /* skip block of text until #else, #elif or #endif. skip also pairs of
932    #if/#endif */
preprocess_skip(void)933 static void preprocess_skip(void)
934 {
935     int a, start_of_line, c, in_warn_or_error;
936     uint8_t *p;
937 
938     p = file->buf_ptr;
939     a = 0;
940 redo_start:
941     start_of_line = 1;
942     in_warn_or_error = 0;
943     for(;;) {
944     redo_no_start:
945         c = *p;
946         switch(c) {
947         case ' ':
948         case '\t':
949         case '\f':
950         case '\v':
951         case '\r':
952             p++;
953             goto redo_no_start;
954         case '\n':
955             file->line_num++;
956             p++;
957             goto redo_start;
958         case '\\':
959             file->buf_ptr = p;
960             c = handle_eob();
961             if (c == CH_EOF) {
962                 expect("#endif");
963             } else if (c == '\\') {
964                 ch = file->buf_ptr[0];
965                 handle_stray_noerror();
966             }
967             p = file->buf_ptr;
968             goto redo_no_start;
969         /* skip strings */
970         case '\"':
971         case '\'':
972             if (in_warn_or_error)
973                 goto _default;
974             p = parse_pp_string(p, c, NULL);
975             break;
976         /* skip comments */
977         case '/':
978             if (in_warn_or_error)
979                 goto _default;
980             file->buf_ptr = p;
981             ch = *p;
982             minp();
983             p = file->buf_ptr;
984             if (ch == '*') {
985                 p = parse_comment(p);
986             } else if (ch == '/') {
987                 p = parse_line_comment(p);
988             }
989             break;
990         case '#':
991             p++;
992             if (start_of_line) {
993                 file->buf_ptr = p;
994                 next_nomacro();
995                 p = file->buf_ptr;
996                 if (a == 0 &&
997                     (tok == TOK_ELSE || tok == TOK_ELIF || tok == TOK_ENDIF))
998                     goto the_end;
999                 if (tok == TOK_IF || tok == TOK_IFDEF || tok == TOK_IFNDEF)
1000                     a++;
1001                 else if (tok == TOK_ENDIF)
1002                     a--;
1003                 else if( tok == TOK_ERROR || tok == TOK_WARNING)
1004                     in_warn_or_error = 1;
1005                 else if (tok == TOK_LINEFEED)
1006                     goto redo_start;
1007                 else if (parse_flags & PARSE_FLAG_ASM_FILE)
1008                     p = parse_line_comment(p - 1);
1009             } else if (parse_flags & PARSE_FLAG_ASM_FILE)
1010                 p = parse_line_comment(p - 1);
1011             break;
1012 _default:
1013         default:
1014             p++;
1015             break;
1016         }
1017         start_of_line = 0;
1018     }
1019  the_end: ;
1020     file->buf_ptr = p;
1021 }
1022 
1023 #if 0
1024 /* return the number of additional 'ints' necessary to store the
1025    token */
1026 static inline int tok_size(const int *p)
1027 {
1028     switch(*p) {
1029         /* 4 bytes */
1030     case TOK_CINT:
1031     case TOK_CUINT:
1032     case TOK_CCHAR:
1033     case TOK_LCHAR:
1034     case TOK_CFLOAT:
1035     case TOK_LINENUM:
1036         return 1 + 1;
1037     case TOK_STR:
1038     case TOK_LSTR:
1039     case TOK_PPNUM:
1040     case TOK_PPSTR:
1041         return 1 + ((sizeof(CString) + ((CString *)(p+1))->size + 3) >> 2);
1042     case TOK_CLONG:
1043     case TOK_CULONG:
1044 	return 1 + LONG_SIZE / 4;
1045     case TOK_CDOUBLE:
1046     case TOK_CLLONG:
1047     case TOK_CULLONG:
1048         return 1 + 2;
1049     case TOK_CLDOUBLE:
1050         return 1 + LDOUBLE_SIZE / 4;
1051     default:
1052         return 1 + 0;
1053     }
1054 }
1055 #endif
1056 
1057 /* token string handling */
tok_str_new(TokenString *s)1058 ST_INLN void tok_str_new(TokenString *s)
1059 {
1060     s->str = NULL;
1061     s->len = s->lastlen = 0;
1062     s->allocated_len = 0;
1063     s->last_line_num = -1;
1064 }
1065 
tok_str_alloc(void)1066 ST_FUNC TokenString *tok_str_alloc(void)
1067 {
1068     TokenString *str = tal_realloc(tokstr_alloc, 0, sizeof *str);
1069     tok_str_new(str);
1070     return str;
1071 }
1072 
tok_str_dup(TokenString *s)1073 ST_FUNC int *tok_str_dup(TokenString *s)
1074 {
1075     int *str;
1076 
1077     str = tal_realloc(tokstr_alloc, 0, s->len * sizeof(int));
1078     memcpy(str, s->str, s->len * sizeof(int));
1079     return str;
1080 }
1081 
tok_str_free_str(int *str)1082 ST_FUNC void tok_str_free_str(int *str)
1083 {
1084     tal_free(tokstr_alloc, str);
1085 }
1086 
tok_str_free(TokenString *str)1087 ST_FUNC void tok_str_free(TokenString *str)
1088 {
1089     tok_str_free_str(str->str);
1090     tal_free(tokstr_alloc, str);
1091 }
1092 
tok_str_realloc(TokenString *s, int new_size)1093 ST_FUNC int *tok_str_realloc(TokenString *s, int new_size)
1094 {
1095     int *str, size;
1096 
1097     size = s->allocated_len;
1098     if (size < 16)
1099         size = 16;
1100     while (size < new_size)
1101         size = size * 2;
1102     if (size > s->allocated_len) {
1103         str = tal_realloc(tokstr_alloc, s->str, size * sizeof(int));
1104         s->allocated_len = size;
1105         s->str = str;
1106     }
1107     return s->str;
1108 }
1109 
tok_str_add(TokenString *s, int t)1110 ST_FUNC void tok_str_add(TokenString *s, int t)
1111 {
1112     int len, *str;
1113 
1114     len = s->len;
1115     str = s->str;
1116     if (len >= s->allocated_len)
1117         str = tok_str_realloc(s, len + 1);
1118     str[len++] = t;
1119     s->len = len;
1120 }
1121 
begin_macro(TokenString *str, int alloc)1122 ST_FUNC void begin_macro(TokenString *str, int alloc)
1123 {
1124     str->alloc = alloc;
1125     str->prev = macro_stack;
1126     str->prev_ptr = macro_ptr;
1127     str->save_line_num = file->line_num;
1128     macro_ptr = str->str;
1129     macro_stack = str;
1130 }
1131 
end_macro(void)1132 ST_FUNC void end_macro(void)
1133 {
1134     TokenString *str = macro_stack;
1135     macro_stack = str->prev;
1136     macro_ptr = str->prev_ptr;
1137     file->line_num = str->save_line_num;
1138     if (str->alloc != 0) {
1139         if (str->alloc == 2)
1140             str->str = NULL; /* don't free */
1141         tok_str_free(str);
1142     }
1143 }
1144 
tok_str_add2(TokenString *s, int t, CValue *cv)1145 static void tok_str_add2(TokenString *s, int t, CValue *cv)
1146 {
1147     int len, *str;
1148 
1149     len = s->lastlen = s->len;
1150     str = s->str;
1151 
1152     /* allocate space for worst case */
1153     if (len + TOK_MAX_SIZE >= s->allocated_len)
1154         str = tok_str_realloc(s, len + TOK_MAX_SIZE + 1);
1155     str[len++] = t;
1156     switch(t) {
1157     case TOK_CINT:
1158     case TOK_CUINT:
1159     case TOK_CCHAR:
1160     case TOK_LCHAR:
1161     case TOK_CFLOAT:
1162     case TOK_LINENUM:
1163 #if LONG_SIZE == 4
1164     case TOK_CLONG:
1165     case TOK_CULONG:
1166 #endif
1167         str[len++] = cv->tab[0];
1168         break;
1169     case TOK_PPNUM:
1170     case TOK_PPSTR:
1171     case TOK_STR:
1172     case TOK_LSTR:
1173         {
1174             /* Insert the string into the int array. */
1175             size_t nb_words =
1176                 1 + (cv->str.size + sizeof(int) - 1) / sizeof(int);
1177             if (len + nb_words >= s->allocated_len)
1178                 str = tok_str_realloc(s, len + nb_words + 1);
1179             str[len] = cv->str.size;
1180             memcpy(&str[len + 1], cv->str.data, cv->str.size);
1181             len += nb_words;
1182         }
1183         break;
1184     case TOK_CDOUBLE:
1185     case TOK_CLLONG:
1186     case TOK_CULLONG:
1187 #if LONG_SIZE == 8
1188     case TOK_CLONG:
1189     case TOK_CULONG:
1190 #endif
1191 #if LDOUBLE_SIZE == 8
1192     case TOK_CLDOUBLE:
1193 #endif
1194         str[len++] = cv->tab[0];
1195         str[len++] = cv->tab[1];
1196         break;
1197 #if LDOUBLE_SIZE == 12
1198     case TOK_CLDOUBLE:
1199         str[len++] = cv->tab[0];
1200         str[len++] = cv->tab[1];
1201         str[len++] = cv->tab[2];
1202 #elif LDOUBLE_SIZE == 16
1203     case TOK_CLDOUBLE:
1204         str[len++] = cv->tab[0];
1205         str[len++] = cv->tab[1];
1206         str[len++] = cv->tab[2];
1207         str[len++] = cv->tab[3];
1208 #elif LDOUBLE_SIZE != 8
1209 #error add long double size support
1210 #endif
1211         break;
1212     default:
1213         break;
1214     }
1215     s->len = len;
1216 }
1217 
1218 /* add the current parse token in token string 's' */
tok_str_add_tok(TokenString *s)1219 ST_FUNC void tok_str_add_tok(TokenString *s)
1220 {
1221     CValue cval;
1222 
1223     /* save line number info */
1224     if (file->line_num != s->last_line_num) {
1225         s->last_line_num = file->line_num;
1226         cval.i = s->last_line_num;
1227         tok_str_add2(s, TOK_LINENUM, &cval);
1228     }
1229     tok_str_add2(s, tok, &tokc);
1230 }
1231 
1232 /* get a token from an integer array and increment pointer. */
TOK_GET(int *t, const int **pp, CValue *cv)1233 static inline void TOK_GET(int *t, const int **pp, CValue *cv)
1234 {
1235     const int *p = *pp;
1236     int n, *tab;
1237 
1238     tab = cv->tab;
1239     switch(*t = *p++) {
1240 #if LONG_SIZE == 4
1241     case TOK_CLONG:
1242 #endif
1243     case TOK_CINT:
1244     case TOK_CCHAR:
1245     case TOK_LCHAR:
1246     case TOK_LINENUM:
1247         cv->i = *p++;
1248         break;
1249 #if LONG_SIZE == 4
1250     case TOK_CULONG:
1251 #endif
1252     case TOK_CUINT:
1253         cv->i = (unsigned)*p++;
1254         break;
1255     case TOK_CFLOAT:
1256 	tab[0] = *p++;
1257 	break;
1258     case TOK_STR:
1259     case TOK_LSTR:
1260     case TOK_PPNUM:
1261     case TOK_PPSTR:
1262         cv->str.size = *p++;
1263         cv->str.data = p;
1264         p += (cv->str.size + sizeof(int) - 1) / sizeof(int);
1265         break;
1266     case TOK_CDOUBLE:
1267     case TOK_CLLONG:
1268     case TOK_CULLONG:
1269 #if LONG_SIZE == 8
1270     case TOK_CLONG:
1271     case TOK_CULONG:
1272 #endif
1273         n = 2;
1274         goto copy;
1275     case TOK_CLDOUBLE:
1276 #if LDOUBLE_SIZE == 16
1277         n = 4;
1278 #elif LDOUBLE_SIZE == 12
1279         n = 3;
1280 #elif LDOUBLE_SIZE == 8
1281         n = 2;
1282 #else
1283 # error add long double size support
1284 #endif
1285     copy:
1286         do
1287             *tab++ = *p++;
1288         while (--n);
1289         break;
1290     default:
1291         break;
1292     }
1293     *pp = p;
1294 }
1295 
macro_is_equal(const int *a, const int *b)1296 static int macro_is_equal(const int *a, const int *b)
1297 {
1298     CValue cv;
1299     int t;
1300 
1301     if (!a || !b)
1302         return 1;
1303 
1304     while (*a && *b) {
1305         /* first time preallocate macro_equal_buf, next time only reset position to start */
1306         cstr_reset(&macro_equal_buf);
1307         TOK_GET(&t, &a, &cv);
1308         cstr_cat(&macro_equal_buf, get_tok_str(t, &cv), 0);
1309         TOK_GET(&t, &b, &cv);
1310         if (strcmp(macro_equal_buf.data, get_tok_str(t, &cv)))
1311             return 0;
1312     }
1313     return !(*a || *b);
1314 }
1315 
1316 /* defines handling */
define_push(int v, int macro_type, int *str, Sym *first_arg)1317 ST_INLN void define_push(int v, int macro_type, int *str, Sym *first_arg)
1318 {
1319     Sym *s, *o;
1320 
1321     o = define_find(v);
1322     s = sym_push2(&define_stack, v, macro_type, 0);
1323     s->d = str;
1324     s->next = first_arg;
1325     table_ident[v - TOK_IDENT]->sym_define = s;
1326 
1327     if (o && !macro_is_equal(o->d, s->d))
1328 	tcc_warning("%s redefined", get_tok_str(v, NULL));
1329 }
1330 
1331 /* undefined a define symbol. Its name is just set to zero */
define_undef(Sym *s)1332 ST_FUNC void define_undef(Sym *s)
1333 {
1334     int v = s->v;
1335     if (v >= TOK_IDENT && v < tok_ident)
1336         table_ident[v - TOK_IDENT]->sym_define = NULL;
1337 }
1338 
define_find(int v)1339 ST_INLN Sym *define_find(int v)
1340 {
1341     v -= TOK_IDENT;
1342     if ((unsigned)v >= (unsigned)(tok_ident - TOK_IDENT))
1343         return NULL;
1344     return table_ident[v]->sym_define;
1345 }
1346 
1347 /* free define stack until top reaches 'b' */
free_defines(Sym *b)1348 ST_FUNC void free_defines(Sym *b)
1349 {
1350     while (define_stack != b) {
1351         Sym *top = define_stack;
1352         define_stack = top->prev;
1353         tok_str_free_str(top->d);
1354         define_undef(top);
1355         sym_free(top);
1356     }
1357 }
1358 
1359 /* label lookup */
label_find(int v)1360 ST_FUNC Sym *label_find(int v)
1361 {
1362     v -= TOK_IDENT;
1363     if ((unsigned)v >= (unsigned)(tok_ident - TOK_IDENT))
1364         return NULL;
1365     return table_ident[v]->sym_label;
1366 }
1367 
label_push(Sym **ptop, int v, int flags)1368 ST_FUNC Sym *label_push(Sym **ptop, int v, int flags)
1369 {
1370     Sym *s, **ps;
1371     s = sym_push2(ptop, v, 0, 0);
1372     s->r = flags;
1373     ps = &table_ident[v - TOK_IDENT]->sym_label;
1374     if (ptop == &global_label_stack) {
1375         /* modify the top most local identifier, so that
1376            sym_identifier will point to 's' when popped */
1377         while (*ps != NULL)
1378             ps = &(*ps)->prev_tok;
1379     }
1380     s->prev_tok = *ps;
1381     *ps = s;
1382     return s;
1383 }
1384 
1385 /* pop labels until element last is reached. Look if any labels are
1386    undefined. Define symbols if '&&label' was used. */
label_pop(Sym **ptop, Sym *slast, int keep)1387 ST_FUNC void label_pop(Sym **ptop, Sym *slast, int keep)
1388 {
1389     Sym *s, *s1;
1390     for(s = *ptop; s != slast; s = s1) {
1391         s1 = s->prev;
1392         if (s->r == LABEL_DECLARED) {
1393             tcc_warning("label '%s' declared but not used", get_tok_str(s->v, NULL));
1394         } else if (s->r == LABEL_FORWARD) {
1395                 tcc_error("label '%s' used but not defined",
1396                       get_tok_str(s->v, NULL));
1397         } else {
1398             if (s->c) {
1399                 /* define corresponding symbol. A size of
1400                    1 is put. */
1401                 put_extern_sym(s, cur_text_section, s->jnext, 1);
1402             }
1403         }
1404         /* remove label */
1405         if (s->r != LABEL_GONE)
1406             table_ident[s->v - TOK_IDENT]->sym_label = s->prev_tok;
1407         if (!keep)
1408             sym_free(s);
1409         else
1410             s->r = LABEL_GONE;
1411     }
1412     if (!keep)
1413         *ptop = slast;
1414 }
1415 
1416 /* fake the nth "#if defined test_..." for tcc -dt -run */
maybe_run_test(TCCState *s)1417 static void maybe_run_test(TCCState *s)
1418 {
1419     const char *p;
1420     if (s->include_stack_ptr != s->include_stack)
1421         return;
1422     p = get_tok_str(tok, NULL);
1423     if (0 != memcmp(p, "test_", 5))
1424         return;
1425     if (0 != --s->run_test)
1426         return;
1427     fprintf(s->ppfp, "\n[%s]\n" + !(s->dflag & 32), p), fflush(s->ppfp);
1428     define_push(tok, MACRO_OBJ, NULL, NULL);
1429 }
1430 
1431 /* eval an expression for #if/#elif */
expr_preprocess(void)1432 static int expr_preprocess(void)
1433 {
1434     int c, t;
1435     TokenString *str;
1436 
1437     str = tok_str_alloc();
1438     pp_expr = 1;
1439     while (tok != TOK_LINEFEED && tok != TOK_EOF) {
1440         next(); /* do macro subst */
1441         if (tok == TOK_DEFINED) {
1442             next_nomacro();
1443             t = tok;
1444             if (t == '(')
1445                 next_nomacro();
1446             if (tok < TOK_IDENT)
1447                 expect("identifier");
1448             if (tcc_state->run_test)
1449                 maybe_run_test(tcc_state);
1450             c = define_find(tok) != 0;
1451             if (t == '(') {
1452                 next_nomacro();
1453                 if (tok != ')')
1454                     expect("')'");
1455             }
1456             tok = TOK_CINT;
1457             tokc.i = c;
1458         } else if (tok >= TOK_IDENT) {
1459             /* if undefined macro */
1460             tok = TOK_CINT;
1461             tokc.i = 0;
1462         }
1463         tok_str_add_tok(str);
1464     }
1465     pp_expr = 0;
1466     tok_str_add(str, -1); /* simulate end of file */
1467     tok_str_add(str, 0);
1468     /* now evaluate C constant expression */
1469     begin_macro(str, 1);
1470     next();
1471     c = expr_const();
1472     end_macro();
1473     return c != 0;
1474 }
1475 
1476 
1477 /* parse after #define */
parse_define(void)1478 ST_FUNC void parse_define(void)
1479 {
1480     Sym *s, *first, **ps;
1481     int v, t, varg, is_vaargs, spc;
1482     int saved_parse_flags = parse_flags;
1483 
1484     v = tok;
1485     if (v < TOK_IDENT || v == TOK_DEFINED)
1486         tcc_error("invalid macro name '%s'", get_tok_str(tok, &tokc));
1487     /* XXX: should check if same macro (ANSI) */
1488     first = NULL;
1489     t = MACRO_OBJ;
1490     /* We have to parse the whole define as if not in asm mode, in particular
1491        no line comment with '#' must be ignored.  Also for function
1492        macros the argument list must be parsed without '.' being an ID
1493        character.  */
1494     parse_flags = ((parse_flags & ~PARSE_FLAG_ASM_FILE) | PARSE_FLAG_SPACES);
1495     /* '(' must be just after macro definition for MACRO_FUNC */
1496     next_nomacro_spc();
1497     if (tok == '(') {
1498         int dotid = set_idnum('.', 0);
1499         next_nomacro();
1500         ps = &first;
1501         if (tok != ')') for (;;) {
1502             varg = tok;
1503             next_nomacro();
1504             is_vaargs = 0;
1505             if (varg == TOK_DOTS) {
1506                 varg = TOK___VA_ARGS__;
1507                 is_vaargs = 1;
1508             } else if (tok == TOK_DOTS && gnu_ext) {
1509                 is_vaargs = 1;
1510                 next_nomacro();
1511             }
1512             if (varg < TOK_IDENT)
1513         bad_list:
1514                 tcc_error("bad macro parameter list");
1515             s = sym_push2(&define_stack, varg | SYM_FIELD, is_vaargs, 0);
1516             *ps = s;
1517             ps = &s->next;
1518             if (tok == ')')
1519                 break;
1520             if (tok != ',' || is_vaargs)
1521                 goto bad_list;
1522             next_nomacro();
1523         }
1524         next_nomacro_spc();
1525         t = MACRO_FUNC;
1526         set_idnum('.', dotid);
1527     }
1528 
1529     tokstr_buf.len = 0;
1530     spc = 2;
1531     parse_flags |= PARSE_FLAG_ACCEPT_STRAYS | PARSE_FLAG_SPACES | PARSE_FLAG_LINEFEED;
1532     /* The body of a macro definition should be parsed such that identifiers
1533        are parsed like the file mode determines (i.e. with '.' being an
1534        ID character in asm mode).  But '#' should be retained instead of
1535        regarded as line comment leader, so still don't set ASM_FILE
1536        in parse_flags. */
1537     while (tok != TOK_LINEFEED && tok != TOK_EOF) {
1538         /* remove spaces around ## and after '#' */
1539         if (TOK_TWOSHARPS == tok) {
1540             if (2 == spc)
1541                 goto bad_twosharp;
1542             if (1 == spc)
1543                 --tokstr_buf.len;
1544             spc = 3;
1545 	    tok = TOK_PPJOIN;
1546         } else if ('#' == tok) {
1547             spc = 4;
1548         } else if (check_space(tok, &spc)) {
1549             goto skip;
1550         }
1551         tok_str_add2(&tokstr_buf, tok, &tokc);
1552     skip:
1553         next_nomacro_spc();
1554     }
1555 
1556     parse_flags = saved_parse_flags;
1557     if (spc == 1)
1558         --tokstr_buf.len; /* remove trailing space */
1559     tok_str_add(&tokstr_buf, 0);
1560     if (3 == spc)
1561 bad_twosharp:
1562         tcc_error("'##' cannot appear at either end of macro");
1563     define_push(v, t, tok_str_dup(&tokstr_buf), first);
1564 }
1565 
search_cached_include(TCCState *s1, const char *filename, int add)1566 static CachedInclude *search_cached_include(TCCState *s1, const char *filename, int add)
1567 {
1568     const unsigned char *s;
1569     unsigned int h;
1570     CachedInclude *e;
1571     int i;
1572 
1573     h = TOK_HASH_INIT;
1574     s = (unsigned char *) filename;
1575     while (*s) {
1576 #ifdef _WIN32
1577         h = TOK_HASH_FUNC(h, toup(*s));
1578 #else
1579         h = TOK_HASH_FUNC(h, *s);
1580 #endif
1581         s++;
1582     }
1583     h &= (CACHED_INCLUDES_HASH_SIZE - 1);
1584 
1585     i = s1->cached_includes_hash[h];
1586     for(;;) {
1587         if (i == 0)
1588             break;
1589         e = s1->cached_includes[i - 1];
1590         if (0 == PATHCMP(e->filename, filename))
1591             return e;
1592         i = e->hash_next;
1593     }
1594     if (!add)
1595         return NULL;
1596 
1597     e = tcc_malloc(sizeof(CachedInclude) + strlen(filename));
1598     strcpy(e->filename, filename);
1599     e->ifndef_macro = e->once = 0;
1600     dynarray_add(&s1->cached_includes, &s1->nb_cached_includes, e);
1601     /* add in hash table */
1602     e->hash_next = s1->cached_includes_hash[h];
1603     s1->cached_includes_hash[h] = s1->nb_cached_includes;
1604 #ifdef INC_DEBUG
1605     printf("adding cached '%s'\n", filename);
1606 #endif
1607     return e;
1608 }
1609 
pragma_parse(TCCState *s1)1610 static void pragma_parse(TCCState *s1)
1611 {
1612     next_nomacro();
1613     if (tok == TOK_push_macro || tok == TOK_pop_macro) {
1614         int t = tok, v;
1615         Sym *s;
1616 
1617         if (next(), tok != '(')
1618             goto pragma_err;
1619         if (next(), tok != TOK_STR)
1620             goto pragma_err;
1621         v = tok_alloc(tokc.str.data, tokc.str.size - 1)->tok;
1622         if (next(), tok != ')')
1623             goto pragma_err;
1624         if (t == TOK_push_macro) {
1625             while (NULL == (s = define_find(v)))
1626                 define_push(v, 0, NULL, NULL);
1627             s->type.ref = s; /* set push boundary */
1628         } else {
1629             for (s = define_stack; s; s = s->prev)
1630                 if (s->v == v && s->type.ref == s) {
1631                     s->type.ref = NULL;
1632                     break;
1633                 }
1634         }
1635         if (s)
1636             table_ident[v - TOK_IDENT]->sym_define = s->d ? s : NULL;
1637         else
1638             tcc_warning("unbalanced #pragma pop_macro");
1639         pp_debug_tok = t, pp_debug_symv = v;
1640 
1641     } else if (tok == TOK_once) {
1642         search_cached_include(s1, file->filename, 1)->once = pp_once;
1643 
1644     } else if (s1->output_type == TCC_OUTPUT_PREPROCESS) {
1645         /* tcc -E: keep pragmas below unchanged */
1646         unget_tok(' ');
1647         unget_tok(TOK_PRAGMA);
1648         unget_tok('#');
1649         unget_tok(TOK_LINEFEED);
1650 
1651     } else if (tok == TOK_pack) {
1652         /* This may be:
1653            #pragma pack(1) // set
1654            #pragma pack() // reset to default
1655            #pragma pack(push,1) // push & set
1656            #pragma pack(pop) // restore previous */
1657         next();
1658         skip('(');
1659         if (tok == TOK_ASM_pop) {
1660             next();
1661             if (s1->pack_stack_ptr <= s1->pack_stack) {
1662             stk_error:
1663                 tcc_error("out of pack stack");
1664             }
1665             s1->pack_stack_ptr--;
1666         } else {
1667             int val = 0;
1668             if (tok != ')') {
1669                 if (tok == TOK_ASM_push) {
1670                     next();
1671                     if (s1->pack_stack_ptr >= s1->pack_stack + PACK_STACK_SIZE - 1)
1672                         goto stk_error;
1673                     s1->pack_stack_ptr++;
1674                     skip(',');
1675                 }
1676                 if (tok != TOK_CINT)
1677                     goto pragma_err;
1678                 val = tokc.i;
1679                 if (val < 1 || val > 16 || (val & (val - 1)) != 0)
1680                     goto pragma_err;
1681                 next();
1682             }
1683             *s1->pack_stack_ptr = val;
1684         }
1685         if (tok != ')')
1686             goto pragma_err;
1687 
1688     } else if (tok == TOK_comment) {
1689         char *p; int t;
1690         next();
1691         skip('(');
1692         t = tok;
1693         next();
1694         skip(',');
1695         if (tok != TOK_STR)
1696             goto pragma_err;
1697         p = tcc_strdup((char *)tokc.str.data);
1698         next();
1699         if (tok != ')')
1700             goto pragma_err;
1701         if (t == TOK_lib) {
1702             dynarray_add(&s1->pragma_libs, &s1->nb_pragma_libs, p);
1703         } else {
1704             if (t == TOK_option)
1705                 tcc_set_options(s1, p);
1706             tcc_free(p);
1707         }
1708 
1709     } else if (s1->warn_unsupported) {
1710         tcc_warning("#pragma %s is ignored", get_tok_str(tok, &tokc));
1711     }
1712     return;
1713 
1714 pragma_err:
1715     tcc_error("malformed #pragma directive");
1716     return;
1717 }
1718 
1719 /* is_bof is true if first non space token at beginning of file */
preprocess(int is_bof)1720 ST_FUNC void preprocess(int is_bof)
1721 {
1722     TCCState *s1 = tcc_state;
1723     int i, c, n, saved_parse_flags;
1724     char buf[1024], *q;
1725     Sym *s;
1726 
1727     saved_parse_flags = parse_flags;
1728     parse_flags = PARSE_FLAG_PREPROCESS
1729         | PARSE_FLAG_TOK_NUM
1730         | PARSE_FLAG_TOK_STR
1731         | PARSE_FLAG_LINEFEED
1732         | (parse_flags & PARSE_FLAG_ASM_FILE)
1733         ;
1734 
1735     next_nomacro();
1736  redo:
1737     switch(tok) {
1738     case TOK_DEFINE:
1739         pp_debug_tok = tok;
1740         next_nomacro();
1741         pp_debug_symv = tok;
1742         parse_define();
1743         break;
1744     case TOK_UNDEF:
1745         pp_debug_tok = tok;
1746         next_nomacro();
1747         pp_debug_symv = tok;
1748         s = define_find(tok);
1749         /* undefine symbol by putting an invalid name */
1750         if (s)
1751             define_undef(s);
1752         break;
1753     case TOK_INCLUDE:
1754     case TOK_INCLUDE_NEXT:
1755         ch = file->buf_ptr[0];
1756         /* XXX: incorrect if comments : use next_nomacro with a special mode */
1757         skip_spaces();
1758         if (ch == '<') {
1759             c = '>';
1760             goto read_name;
1761         } else if (ch == '\"') {
1762             c = ch;
1763         read_name:
1764             inp();
1765             q = buf;
1766             while (ch != c && ch != '\n' && ch != CH_EOF) {
1767                 if ((q - buf) < sizeof(buf) - 1)
1768                     *q++ = ch;
1769                 if (ch == '\\') {
1770                     if (handle_stray_noerror() == 0)
1771                         --q;
1772                 } else
1773                     inp();
1774             }
1775             *q = '\0';
1776             minp();
1777 #if 0
1778             /* eat all spaces and comments after include */
1779             /* XXX: slightly incorrect */
1780             while (ch1 != '\n' && ch1 != CH_EOF)
1781                 inp();
1782 #endif
1783         } else {
1784 	    int len;
1785             /* computed #include : concatenate everything up to linefeed,
1786 	       the result must be one of the two accepted forms.
1787 	       Don't convert pp-tokens to tokens here.  */
1788 	    parse_flags = (PARSE_FLAG_PREPROCESS
1789 			   | PARSE_FLAG_LINEFEED
1790 			   | (parse_flags & PARSE_FLAG_ASM_FILE));
1791             next();
1792             buf[0] = '\0';
1793 	    while (tok != TOK_LINEFEED) {
1794 		pstrcat(buf, sizeof(buf), get_tok_str(tok, &tokc));
1795 		next();
1796 	    }
1797 	    len = strlen(buf);
1798 	    /* check syntax and remove '<>|""' */
1799 	    if ((len < 2 || ((buf[0] != '"' || buf[len-1] != '"') &&
1800 			     (buf[0] != '<' || buf[len-1] != '>'))))
1801 	        tcc_error("'#include' expects \"FILENAME\" or <FILENAME>");
1802 	    c = buf[len-1];
1803 	    memmove(buf, buf + 1, len - 2);
1804 	    buf[len - 2] = '\0';
1805         }
1806 
1807         if (s1->include_stack_ptr >= s1->include_stack + INCLUDE_STACK_SIZE)
1808             tcc_error("#include recursion too deep");
1809         /* push current file on stack */
1810         *s1->include_stack_ptr++ = file;
1811         i = tok == TOK_INCLUDE_NEXT ? file->include_next_index: 0;
1812         n = 2 + s1->nb_include_paths + s1->nb_sysinclude_paths;
1813         for (; i < n; ++i) {
1814             char buf1[sizeof file->filename];
1815             CachedInclude *e;
1816             const char *path;
1817 
1818             if (i == 0) {
1819                 /* check absolute include path */
1820                 if (!IS_ABSPATH(buf))
1821                     continue;
1822                 buf1[0] = 0;
1823 
1824             } else if (i == 1) {
1825                 /* search in file's dir if "header.h" */
1826                 if (c != '\"')
1827                     continue;
1828                 /* https://savannah.nongnu.org/bugs/index.php?50847 */
1829                 path = file->true_filename;
1830                 pstrncpy(buf1, path, tcc_basename(path) - path);
1831 
1832             } else {
1833                 /* search in all the include paths */
1834                 int j = i - 2, k = j - s1->nb_include_paths;
1835                 path = k < 0 ? s1->include_paths[j] : s1->sysinclude_paths[k];
1836                 pstrcpy(buf1, sizeof(buf1), path);
1837                 pstrcat(buf1, sizeof(buf1), "/");
1838             }
1839 
1840             pstrcat(buf1, sizeof(buf1), buf);
1841             e = search_cached_include(s1, buf1, 0);
1842             if (e && (define_find(e->ifndef_macro) || e->once == pp_once)) {
1843                 /* no need to parse the include because the 'ifndef macro'
1844                    is defined (or had #pragma once) */
1845 #ifdef INC_DEBUG
1846                 printf("%s: skipping cached %s\n", file->filename, buf1);
1847 #endif
1848                 goto include_done;
1849             }
1850 
1851             if (tcc_open(s1, buf1) < 0)
1852                 continue;
1853 
1854             file->include_next_index = i + 1;
1855 #ifdef INC_DEBUG
1856             printf("%s: including %s\n", file->prev->filename, file->filename);
1857 #endif
1858             /* update target deps */
1859             if (s1->gen_deps) {
1860                 dynarray_add(&s1->target_deps, &s1->nb_target_deps,
1861                     tcc_strdup(buf1));
1862             }
1863             /* add include file debug info */
1864             tcc_debug_bincl(tcc_state);
1865             tok_flags |= TOK_FLAG_BOF | TOK_FLAG_BOL;
1866             ch = file->buf_ptr[0];
1867             goto the_end;
1868         }
1869         tcc_error("include file '%s' not found", buf);
1870 include_done:
1871         --s1->include_stack_ptr;
1872         break;
1873     case TOK_IFNDEF:
1874         c = 1;
1875         goto do_ifdef;
1876     case TOK_IF:
1877         c = expr_preprocess();
1878         goto do_if;
1879     case TOK_IFDEF:
1880         c = 0;
1881     do_ifdef:
1882         next_nomacro();
1883         if (tok < TOK_IDENT)
1884             tcc_error("invalid argument for '#if%sdef'", c ? "n" : "");
1885         if (is_bof) {
1886             if (c) {
1887 #ifdef INC_DEBUG
1888                 printf("#ifndef %s\n", get_tok_str(tok, NULL));
1889 #endif
1890                 file->ifndef_macro = tok;
1891             }
1892         }
1893         c = (define_find(tok) != 0) ^ c;
1894     do_if:
1895         if (s1->ifdef_stack_ptr >= s1->ifdef_stack + IFDEF_STACK_SIZE)
1896             tcc_error("memory full (ifdef)");
1897         *s1->ifdef_stack_ptr++ = c;
1898         goto test_skip;
1899     case TOK_ELSE:
1900         if (s1->ifdef_stack_ptr == s1->ifdef_stack)
1901             tcc_error("#else without matching #if");
1902         if (s1->ifdef_stack_ptr[-1] & 2)
1903             tcc_error("#else after #else");
1904         c = (s1->ifdef_stack_ptr[-1] ^= 3);
1905         goto test_else;
1906     case TOK_ELIF:
1907         if (s1->ifdef_stack_ptr == s1->ifdef_stack)
1908             tcc_error("#elif without matching #if");
1909         c = s1->ifdef_stack_ptr[-1];
1910         if (c > 1)
1911             tcc_error("#elif after #else");
1912         /* last #if/#elif expression was true: we skip */
1913         if (c == 1) {
1914             c = 0;
1915         } else {
1916             c = expr_preprocess();
1917             s1->ifdef_stack_ptr[-1] = c;
1918         }
1919     test_else:
1920         if (s1->ifdef_stack_ptr == file->ifdef_stack_ptr + 1)
1921             file->ifndef_macro = 0;
1922     test_skip:
1923         if (!(c & 1)) {
1924             preprocess_skip();
1925             is_bof = 0;
1926             goto redo;
1927         }
1928         break;
1929     case TOK_ENDIF:
1930         if (s1->ifdef_stack_ptr <= file->ifdef_stack_ptr)
1931             tcc_error("#endif without matching #if");
1932         s1->ifdef_stack_ptr--;
1933         /* '#ifndef macro' was at the start of file. Now we check if
1934            an '#endif' is exactly at the end of file */
1935         if (file->ifndef_macro &&
1936             s1->ifdef_stack_ptr == file->ifdef_stack_ptr) {
1937             file->ifndef_macro_saved = file->ifndef_macro;
1938             /* need to set to zero to avoid false matches if another
1939                #ifndef at middle of file */
1940             file->ifndef_macro = 0;
1941             while (tok != TOK_LINEFEED)
1942                 next_nomacro();
1943             tok_flags |= TOK_FLAG_ENDIF;
1944             goto the_end;
1945         }
1946         break;
1947     case TOK_PPNUM:
1948         n = strtoul((char*)tokc.str.data, &q, 10);
1949         goto _line_num;
1950     case TOK_LINE:
1951         next();
1952         if (tok != TOK_CINT)
1953     _line_err:
1954             tcc_error("wrong #line format");
1955         n = tokc.i;
1956     _line_num:
1957         next();
1958         if (tok != TOK_LINEFEED) {
1959             if (tok == TOK_STR) {
1960                 if (file->true_filename == file->filename)
1961                     file->true_filename = tcc_strdup(file->filename);
1962                 /* prepend directory from real file */
1963                 pstrcpy(buf, sizeof buf, file->true_filename);
1964                 *tcc_basename(buf) = 0;
1965                 pstrcat(buf, sizeof buf, (char *)tokc.str.data);
1966                 tcc_debug_putfile(s1, buf);
1967             } else if (parse_flags & PARSE_FLAG_ASM_FILE)
1968                 break;
1969             else
1970                 goto _line_err;
1971             --n;
1972         }
1973         if (file->fd > 0)
1974             total_lines += file->line_num - n;
1975         file->line_num = n;
1976         break;
1977     case TOK_ERROR:
1978     case TOK_WARNING:
1979         c = tok;
1980         ch = file->buf_ptr[0];
1981         skip_spaces();
1982         q = buf;
1983         while (ch != '\n' && ch != CH_EOF) {
1984             if ((q - buf) < sizeof(buf) - 1)
1985                 *q++ = ch;
1986             if (ch == '\\') {
1987                 if (handle_stray_noerror() == 0)
1988                     --q;
1989             } else
1990                 inp();
1991         }
1992         *q = '\0';
1993         if (c == TOK_ERROR)
1994             tcc_error("#error %s", buf);
1995         else
1996             tcc_warning("#warning %s", buf);
1997         break;
1998     case TOK_PRAGMA:
1999         pragma_parse(s1);
2000         break;
2001     case TOK_LINEFEED:
2002         goto the_end;
2003     default:
2004         /* ignore gas line comment in an 'S' file. */
2005         if (saved_parse_flags & PARSE_FLAG_ASM_FILE)
2006             goto ignore;
2007         if (tok == '!' && is_bof)
2008             /* '!' is ignored at beginning to allow C scripts. */
2009             goto ignore;
2010         tcc_warning("Ignoring unknown preprocessing directive #%s", get_tok_str(tok, &tokc));
2011     ignore:
2012         file->buf_ptr = parse_line_comment(file->buf_ptr - 1);
2013         goto the_end;
2014     }
2015     /* ignore other preprocess commands or #! for C scripts */
2016     while (tok != TOK_LINEFEED)
2017         next_nomacro();
2018  the_end:
2019     parse_flags = saved_parse_flags;
2020 }
2021 
2022 /* evaluate escape codes in a string. */
parse_escape_string(CString *outstr, const uint8_t *buf, int is_long)2023 static void parse_escape_string(CString *outstr, const uint8_t *buf, int is_long)
2024 {
2025     int c, n;
2026     const uint8_t *p;
2027 
2028     p = buf;
2029     for(;;) {
2030         c = *p;
2031         if (c == '\0')
2032             break;
2033         if (c == '\\') {
2034             p++;
2035             /* escape */
2036             c = *p;
2037             switch(c) {
2038             case '0': case '1': case '2': case '3':
2039             case '4': case '5': case '6': case '7':
2040                 /* at most three octal digits */
2041                 n = c - '0';
2042                 p++;
2043                 c = *p;
2044                 if (isoct(c)) {
2045                     n = n * 8 + c - '0';
2046                     p++;
2047                     c = *p;
2048                     if (isoct(c)) {
2049                         n = n * 8 + c - '0';
2050                         p++;
2051                     }
2052                 }
2053                 c = n;
2054                 goto add_char_nonext;
2055             case 'x':
2056             case 'u':
2057             case 'U':
2058                 p++;
2059                 n = 0;
2060                 for(;;) {
2061                     c = *p;
2062                     if (c >= 'a' && c <= 'f')
2063                         c = c - 'a' + 10;
2064                     else if (c >= 'A' && c <= 'F')
2065                         c = c - 'A' + 10;
2066                     else if (isnum(c))
2067                         c = c - '0';
2068                     else
2069                         break;
2070                     n = n * 16 + c;
2071                     p++;
2072                 }
2073                 c = n;
2074                 goto add_char_nonext;
2075             case 'a':
2076                 c = '\a';
2077                 break;
2078             case 'b':
2079                 c = '\b';
2080                 break;
2081             case 'f':
2082                 c = '\f';
2083                 break;
2084             case 'n':
2085                 c = '\n';
2086                 break;
2087             case 'r':
2088                 c = '\r';
2089                 break;
2090             case 't':
2091                 c = '\t';
2092                 break;
2093             case 'v':
2094                 c = '\v';
2095                 break;
2096             case 'e':
2097                 if (!gnu_ext)
2098                     goto invalid_escape;
2099                 c = 27;
2100                 break;
2101             case '\'':
2102             case '\"':
2103             case '\\':
2104             case '?':
2105                 break;
2106             default:
2107             invalid_escape:
2108                 if (c >= '!' && c <= '~')
2109                     tcc_warning("unknown escape sequence: \'\\%c\'", c);
2110                 else
2111                     tcc_warning("unknown escape sequence: \'\\x%x\'", c);
2112                 break;
2113             }
2114         } else if (is_long && c >= 0x80) {
2115             /* assume we are processing UTF-8 sequence */
2116             /* reference: The Unicode Standard, Version 10.0, ch3.9 */
2117 
2118             int cont; /* count of continuation bytes */
2119             int skip; /* how many bytes should skip when error occurred */
2120             int i;
2121 
2122             /* decode leading byte */
2123             if (c < 0xC2) {
2124 	            skip = 1; goto invalid_utf8_sequence;
2125             } else if (c <= 0xDF) {
2126 	            cont = 1; n = c & 0x1f;
2127             } else if (c <= 0xEF) {
2128 	            cont = 2; n = c & 0xf;
2129             } else if (c <= 0xF4) {
2130 	            cont = 3; n = c & 0x7;
2131             } else {
2132 	            skip = 1; goto invalid_utf8_sequence;
2133             }
2134 
2135             /* decode continuation bytes */
2136             for (i = 1; i <= cont; i++) {
2137                 int l = 0x80, h = 0xBF;
2138 
2139                 /* adjust limit for second byte */
2140                 if (i == 1) {
2141                     switch (c) {
2142                     case 0xE0: l = 0xA0; break;
2143                     case 0xED: h = 0x9F; break;
2144                     case 0xF0: l = 0x90; break;
2145                     case 0xF4: h = 0x8F; break;
2146                     }
2147                 }
2148 
2149                 if (p[i] < l || p[i] > h) {
2150                     skip = i; goto invalid_utf8_sequence;
2151                 }
2152 
2153                 n = (n << 6) | (p[i] & 0x3f);
2154             }
2155 
2156             /* advance pointer */
2157             p += 1 + cont;
2158             c = n;
2159             goto add_char_nonext;
2160 
2161             /* error handling */
2162         invalid_utf8_sequence:
2163             tcc_warning("ill-formed UTF-8 subsequence starting with: \'\\x%x\'", c);
2164             c = 0xFFFD;
2165             p += skip;
2166             goto add_char_nonext;
2167 
2168         }
2169         p++;
2170     add_char_nonext:
2171         if (!is_long)
2172             cstr_ccat(outstr, c);
2173         else {
2174 #ifdef TCC_TARGET_PE
2175             /* store as UTF-16 */
2176             if (c < 0x10000) {
2177                 cstr_wccat(outstr, c);
2178             } else {
2179                 c -= 0x10000;
2180                 cstr_wccat(outstr, (c >> 10) + 0xD800);
2181                 cstr_wccat(outstr, (c & 0x3FF) + 0xDC00);
2182             }
2183 #else
2184             cstr_wccat(outstr, c);
2185 #endif
2186         }
2187     }
2188     /* add a trailing '\0' */
2189     if (!is_long)
2190         cstr_ccat(outstr, '\0');
2191     else
2192         cstr_wccat(outstr, '\0');
2193 }
2194 
parse_string(const char *s, int len)2195 static void parse_string(const char *s, int len)
2196 {
2197     uint8_t buf[1000], *p = buf;
2198     int is_long, sep;
2199 
2200     if ((is_long = *s == 'L'))
2201         ++s, --len;
2202     sep = *s++;
2203     len -= 2;
2204     if (len >= sizeof buf)
2205         p = tcc_malloc(len + 1);
2206     memcpy(p, s, len);
2207     p[len] = 0;
2208 
2209     cstr_reset(&tokcstr);
2210     parse_escape_string(&tokcstr, p, is_long);
2211     if (p != buf)
2212         tcc_free(p);
2213 
2214     if (sep == '\'') {
2215         int char_size, i, n, c;
2216         /* XXX: make it portable */
2217         if (!is_long)
2218             tok = TOK_CCHAR, char_size = 1;
2219         else
2220             tok = TOK_LCHAR, char_size = sizeof(nwchar_t);
2221         n = tokcstr.size / char_size - 1;
2222         if (n < 1)
2223             tcc_error("empty character constant");
2224         if (n > 1)
2225             tcc_warning("multi-character character constant");
2226         for (c = i = 0; i < n; ++i) {
2227             if (is_long)
2228                 c = ((nwchar_t *)tokcstr.data)[i];
2229             else
2230                 c = (c << 8) | ((char *)tokcstr.data)[i];
2231         }
2232         tokc.i = c;
2233     } else {
2234         tokc.str.size = tokcstr.size;
2235         tokc.str.data = tokcstr.data;
2236         if (!is_long)
2237             tok = TOK_STR;
2238         else
2239             tok = TOK_LSTR;
2240     }
2241 }
2242 
2243 /* we use 64 bit numbers */
2244 #define BN_SIZE 2
2245 
2246 /* bn = (bn << shift) | or_val */
bn_lshift(unsigned int *bn, int shift, int or_val)2247 static void bn_lshift(unsigned int *bn, int shift, int or_val)
2248 {
2249     int i;
2250     unsigned int v;
2251     for(i=0;i<BN_SIZE;i++) {
2252         v = bn[i];
2253         bn[i] = (v << shift) | or_val;
2254         or_val = v >> (32 - shift);
2255     }
2256 }
2257 
bn_zero(unsigned int *bn)2258 static void bn_zero(unsigned int *bn)
2259 {
2260     int i;
2261     for(i=0;i<BN_SIZE;i++) {
2262         bn[i] = 0;
2263     }
2264 }
2265 
2266 /* parse number in null terminated string 'p' and return it in the
2267    current token */
parse_number(const char *p)2268 static void parse_number(const char *p)
2269 {
2270     int b, t, shift, frac_bits, s, exp_val, ch;
2271     char *q;
2272     unsigned int bn[BN_SIZE];
2273     double d;
2274 
2275     /* number */
2276     q = token_buf;
2277     ch = *p++;
2278     t = ch;
2279     ch = *p++;
2280     *q++ = t;
2281     b = 10;
2282     if (t == '.') {
2283         goto float_frac_parse;
2284     } else if (t == '0') {
2285         if (ch == 'x' || ch == 'X') {
2286             q--;
2287             ch = *p++;
2288             b = 16;
2289         } else if (tcc_state->tcc_ext && (ch == 'b' || ch == 'B')) {
2290             q--;
2291             ch = *p++;
2292             b = 2;
2293         }
2294     }
2295     /* parse all digits. cannot check octal numbers at this stage
2296        because of floating point constants */
2297     while (1) {
2298         if (ch >= 'a' && ch <= 'f')
2299             t = ch - 'a' + 10;
2300         else if (ch >= 'A' && ch <= 'F')
2301             t = ch - 'A' + 10;
2302         else if (isnum(ch))
2303             t = ch - '0';
2304         else
2305             break;
2306         if (t >= b)
2307             break;
2308         if (q >= token_buf + STRING_MAX_SIZE) {
2309         num_too_long:
2310             tcc_error("number too long");
2311         }
2312         *q++ = ch;
2313         ch = *p++;
2314     }
2315     if (ch == '.' ||
2316         ((ch == 'e' || ch == 'E') && b == 10) ||
2317         ((ch == 'p' || ch == 'P') && (b == 16 || b == 2))) {
2318         if (b != 10) {
2319             /* NOTE: strtox should support that for hexa numbers, but
2320                non ISOC99 libcs do not support it, so we prefer to do
2321                it by hand */
2322             /* hexadecimal or binary floats */
2323             /* XXX: handle overflows */
2324             *q = '\0';
2325             if (b == 16)
2326                 shift = 4;
2327             else
2328                 shift = 1;
2329             bn_zero(bn);
2330             q = token_buf;
2331             while (1) {
2332                 t = *q++;
2333                 if (t == '\0') {
2334                     break;
2335                 } else if (t >= 'a') {
2336                     t = t - 'a' + 10;
2337                 } else if (t >= 'A') {
2338                     t = t - 'A' + 10;
2339                 } else {
2340                     t = t - '0';
2341                 }
2342                 bn_lshift(bn, shift, t);
2343             }
2344             frac_bits = 0;
2345             if (ch == '.') {
2346                 ch = *p++;
2347                 while (1) {
2348                     t = ch;
2349                     if (t >= 'a' && t <= 'f') {
2350                         t = t - 'a' + 10;
2351                     } else if (t >= 'A' && t <= 'F') {
2352                         t = t - 'A' + 10;
2353                     } else if (t >= '0' && t <= '9') {
2354                         t = t - '0';
2355                     } else {
2356                         break;
2357                     }
2358                     if (t >= b)
2359                         tcc_error("invalid digit");
2360                     bn_lshift(bn, shift, t);
2361                     frac_bits += shift;
2362                     ch = *p++;
2363                 }
2364             }
2365             if (ch != 'p' && ch != 'P')
2366                 expect("exponent");
2367             ch = *p++;
2368             s = 1;
2369             exp_val = 0;
2370             if (ch == '+') {
2371                 ch = *p++;
2372             } else if (ch == '-') {
2373                 s = -1;
2374                 ch = *p++;
2375             }
2376             if (ch < '0' || ch > '9')
2377                 expect("exponent digits");
2378             while (ch >= '0' && ch <= '9') {
2379                 exp_val = exp_val * 10 + ch - '0';
2380                 ch = *p++;
2381             }
2382             exp_val = exp_val * s;
2383 
2384             /* now we can generate the number */
2385             /* XXX: should patch directly float number */
2386             d = (double)bn[1] * 4294967296.0 + (double)bn[0];
2387             d = ldexp(d, exp_val - frac_bits);
2388             t = toup(ch);
2389             if (t == 'F') {
2390                 ch = *p++;
2391                 tok = TOK_CFLOAT;
2392                 /* float : should handle overflow */
2393                 tokc.f = (float)d;
2394             } else if (t == 'L') {
2395                 ch = *p++;
2396 #ifdef TCC_TARGET_PE
2397                 tok = TOK_CDOUBLE;
2398                 tokc.d = d;
2399 #else
2400                 tok = TOK_CLDOUBLE;
2401                 /* XXX: not large enough */
2402                 tokc.ld = (long double)d;
2403 #endif
2404             } else {
2405                 tok = TOK_CDOUBLE;
2406                 tokc.d = d;
2407             }
2408         } else {
2409             /* decimal floats */
2410             if (ch == '.') {
2411                 if (q >= token_buf + STRING_MAX_SIZE)
2412                     goto num_too_long;
2413                 *q++ = ch;
2414                 ch = *p++;
2415             float_frac_parse:
2416                 while (ch >= '0' && ch <= '9') {
2417                     if (q >= token_buf + STRING_MAX_SIZE)
2418                         goto num_too_long;
2419                     *q++ = ch;
2420                     ch = *p++;
2421                 }
2422             }
2423             if (ch == 'e' || ch == 'E') {
2424                 if (q >= token_buf + STRING_MAX_SIZE)
2425                     goto num_too_long;
2426                 *q++ = ch;
2427                 ch = *p++;
2428                 if (ch == '-' || ch == '+') {
2429                     if (q >= token_buf + STRING_MAX_SIZE)
2430                         goto num_too_long;
2431                     *q++ = ch;
2432                     ch = *p++;
2433                 }
2434                 if (ch < '0' || ch > '9')
2435                     expect("exponent digits");
2436                 while (ch >= '0' && ch <= '9') {
2437                     if (q >= token_buf + STRING_MAX_SIZE)
2438                         goto num_too_long;
2439                     *q++ = ch;
2440                     ch = *p++;
2441                 }
2442             }
2443             *q = '\0';
2444             t = toup(ch);
2445             errno = 0;
2446             if (t == 'F') {
2447                 ch = *p++;
2448                 tok = TOK_CFLOAT;
2449                 tokc.f = strtof(token_buf, NULL);
2450             } else if (t == 'L') {
2451                 ch = *p++;
2452 #ifdef TCC_TARGET_PE
2453                 tok = TOK_CDOUBLE;
2454                 tokc.d = strtod(token_buf, NULL);
2455 #else
2456                 tok = TOK_CLDOUBLE;
2457                 tokc.ld = strtold(token_buf, NULL);
2458 #endif
2459             } else {
2460                 tok = TOK_CDOUBLE;
2461                 tokc.d = strtod(token_buf, NULL);
2462             }
2463         }
2464     } else {
2465         unsigned long long n, n1;
2466         int lcount, ucount, ov = 0;
2467         const char *p1;
2468 
2469         /* integer number */
2470         *q = '\0';
2471         q = token_buf;
2472         if (b == 10 && *q == '0') {
2473             b = 8;
2474             q++;
2475         }
2476         n = 0;
2477         while(1) {
2478             t = *q++;
2479             /* no need for checks except for base 10 / 8 errors */
2480             if (t == '\0')
2481                 break;
2482             else if (t >= 'a')
2483                 t = t - 'a' + 10;
2484             else if (t >= 'A')
2485                 t = t - 'A' + 10;
2486             else
2487                 t = t - '0';
2488             if (t >= b)
2489                 tcc_error("invalid digit");
2490             n1 = n;
2491             n = n * b + t;
2492             /* detect overflow */
2493             if (n1 >= 0x1000000000000000ULL && n / b != n1)
2494                 ov = 1;
2495         }
2496 
2497         /* Determine the characteristics (unsigned and/or 64bit) the type of
2498            the constant must have according to the constant suffix(es) */
2499         lcount = ucount = 0;
2500         p1 = p;
2501         for(;;) {
2502             t = toup(ch);
2503             if (t == 'L') {
2504                 if (lcount >= 2)
2505                     tcc_error("three 'l's in integer constant");
2506                 if (lcount && *(p - 1) != ch)
2507                     tcc_error("incorrect integer suffix: %s", p1);
2508                 lcount++;
2509                 ch = *p++;
2510             } else if (t == 'U') {
2511                 if (ucount >= 1)
2512                     tcc_error("two 'u's in integer constant");
2513                 ucount++;
2514                 ch = *p++;
2515             } else {
2516                 break;
2517             }
2518         }
2519 
2520         /* Determine if it needs 64 bits and/or unsigned in order to fit */
2521         if (ucount == 0 && b == 10) {
2522             if (lcount <= (LONG_SIZE == 4)) {
2523                 if (n >= 0x80000000U)
2524                     lcount = (LONG_SIZE == 4) + 1;
2525             }
2526             if (n >= 0x8000000000000000ULL)
2527                 ov = 1, ucount = 1;
2528         } else {
2529             if (lcount <= (LONG_SIZE == 4)) {
2530                 if (n >= 0x100000000ULL)
2531                     lcount = (LONG_SIZE == 4) + 1;
2532                 else if (n >= 0x80000000U)
2533                     ucount = 1;
2534             }
2535             if (n >= 0x8000000000000000ULL)
2536                 ucount = 1;
2537         }
2538 
2539         if (ov)
2540             tcc_warning("integer constant overflow");
2541 
2542         tok = TOK_CINT;
2543 	if (lcount) {
2544             tok = TOK_CLONG;
2545             if (lcount == 2)
2546                 tok = TOK_CLLONG;
2547 	}
2548 	if (ucount)
2549 	    ++tok; /* TOK_CU... */
2550         tokc.i = n;
2551     }
2552     if (ch)
2553         tcc_error("invalid number\n");
2554 }
2555 
2556 
2557 #define PARSE2(c1, tok1, c2, tok2)              \
2558     case c1:                                    \
2559         PEEKC(c, p);                            \
2560         if (c == c2) {                          \
2561             p++;                                \
2562             tok = tok2;                         \
2563         } else {                                \
2564             tok = tok1;                         \
2565         }                                       \
2566         break;
2567 
2568 /* return next token without macro substitution */
next_nomacro1(void)2569 static inline void next_nomacro1(void)
2570 {
2571     int t, c, is_long, len;
2572     TokenSym *ts;
2573     uint8_t *p, *p1;
2574     unsigned int h;
2575 
2576     p = file->buf_ptr;
2577  redo_no_start:
2578     c = *p;
2579     switch(c) {
2580     case ' ':
2581     case '\t':
2582         tok = c;
2583         p++;
2584         if (parse_flags & PARSE_FLAG_SPACES)
2585             goto keep_tok_flags;
2586         while (isidnum_table[*p - CH_EOF] & IS_SPC)
2587             ++p;
2588         goto redo_no_start;
2589     case '\f':
2590     case '\v':
2591     case '\r':
2592         p++;
2593         goto redo_no_start;
2594     case '\\':
2595         /* first look if it is in fact an end of buffer */
2596         c = handle_stray1(p);
2597         p = file->buf_ptr;
2598         if (c == '\\')
2599             goto parse_simple;
2600         if (c != CH_EOF)
2601             goto redo_no_start;
2602         {
2603             TCCState *s1 = tcc_state;
2604             if ((parse_flags & PARSE_FLAG_LINEFEED)
2605                 && !(tok_flags & TOK_FLAG_EOF)) {
2606                 tok_flags |= TOK_FLAG_EOF;
2607                 tok = TOK_LINEFEED;
2608                 goto keep_tok_flags;
2609             } else if (!(parse_flags & PARSE_FLAG_PREPROCESS)) {
2610                 tok = TOK_EOF;
2611             } else if (s1->ifdef_stack_ptr != file->ifdef_stack_ptr) {
2612                 tcc_error("missing #endif");
2613             } else if (s1->include_stack_ptr == s1->include_stack) {
2614                 /* no include left : end of file. */
2615                 tok = TOK_EOF;
2616             } else {
2617                 tok_flags &= ~TOK_FLAG_EOF;
2618                 /* pop include file */
2619 
2620                 /* test if previous '#endif' was after a #ifdef at
2621                    start of file */
2622                 if (tok_flags & TOK_FLAG_ENDIF) {
2623 #ifdef INC_DEBUG
2624                     printf("#endif %s\n", get_tok_str(file->ifndef_macro_saved, NULL));
2625 #endif
2626                     search_cached_include(s1, file->filename, 1)
2627                         ->ifndef_macro = file->ifndef_macro_saved;
2628                     tok_flags &= ~TOK_FLAG_ENDIF;
2629                 }
2630 
2631                 /* add end of include file debug info */
2632                 tcc_debug_eincl(tcc_state);
2633                 /* pop include stack */
2634                 tcc_close();
2635                 s1->include_stack_ptr--;
2636                 p = file->buf_ptr;
2637                 if (p == file->buffer)
2638                     tok_flags = TOK_FLAG_BOF|TOK_FLAG_BOL;
2639                 goto redo_no_start;
2640             }
2641         }
2642         break;
2643 
2644     case '\n':
2645         file->line_num++;
2646         tok_flags |= TOK_FLAG_BOL;
2647         p++;
2648 maybe_newline:
2649         if (0 == (parse_flags & PARSE_FLAG_LINEFEED))
2650             goto redo_no_start;
2651         tok = TOK_LINEFEED;
2652         goto keep_tok_flags;
2653 
2654     case '#':
2655         /* XXX: simplify */
2656         PEEKC(c, p);
2657         if ((tok_flags & TOK_FLAG_BOL) &&
2658             (parse_flags & PARSE_FLAG_PREPROCESS)) {
2659             file->buf_ptr = p;
2660             preprocess(tok_flags & TOK_FLAG_BOF);
2661             p = file->buf_ptr;
2662             goto maybe_newline;
2663         } else {
2664             if (c == '#') {
2665                 p++;
2666                 tok = TOK_TWOSHARPS;
2667             } else {
2668                 if (parse_flags & PARSE_FLAG_ASM_FILE) {
2669                     p = parse_line_comment(p - 1);
2670                     goto redo_no_start;
2671                 } else {
2672                     tok = '#';
2673                 }
2674             }
2675         }
2676         break;
2677 
2678     /* dollar is allowed to start identifiers when not parsing asm */
2679     case '$':
2680         if (!(isidnum_table[c - CH_EOF] & IS_ID)
2681          || (parse_flags & PARSE_FLAG_ASM_FILE))
2682             goto parse_simple;
2683 
2684     case 'a': case 'b': case 'c': case 'd':
2685     case 'e': case 'f': case 'g': case 'h':
2686     case 'i': case 'j': case 'k': case 'l':
2687     case 'm': case 'n': case 'o': case 'p':
2688     case 'q': case 'r': case 's': case 't':
2689     case 'u': case 'v': case 'w': case 'x':
2690     case 'y': case 'z':
2691     case 'A': case 'B': case 'C': case 'D':
2692     case 'E': case 'F': case 'G': case 'H':
2693     case 'I': case 'J': case 'K':
2694     case 'M': case 'N': case 'O': case 'P':
2695     case 'Q': case 'R': case 'S': case 'T':
2696     case 'U': case 'V': case 'W': case 'X':
2697     case 'Y': case 'Z':
2698     case '_':
2699     parse_ident_fast:
2700         p1 = p;
2701         h = TOK_HASH_INIT;
2702         h = TOK_HASH_FUNC(h, c);
2703         while (c = *++p, isidnum_table[c - CH_EOF] & (IS_ID|IS_NUM))
2704             h = TOK_HASH_FUNC(h, c);
2705         len = p - p1;
2706         if (c != '\\') {
2707             TokenSym **pts;
2708 
2709             /* fast case : no stray found, so we have the full token
2710                and we have already hashed it */
2711             h &= (TOK_HASH_SIZE - 1);
2712             pts = &hash_ident[h];
2713             for(;;) {
2714                 ts = *pts;
2715                 if (!ts)
2716                     break;
2717                 if (ts->len == len && !memcmp(ts->str, p1, len))
2718                     goto token_found;
2719                 pts = &(ts->hash_next);
2720             }
2721             ts = tok_alloc_new(pts, (char *) p1, len);
2722         token_found: ;
2723         } else {
2724             /* slower case */
2725             cstr_reset(&tokcstr);
2726             cstr_cat(&tokcstr, (char *) p1, len);
2727             p--;
2728             PEEKC(c, p);
2729         parse_ident_slow:
2730             while (isidnum_table[c - CH_EOF] & (IS_ID|IS_NUM))
2731             {
2732                 cstr_ccat(&tokcstr, c);
2733                 PEEKC(c, p);
2734             }
2735             ts = tok_alloc(tokcstr.data, tokcstr.size);
2736         }
2737         tok = ts->tok;
2738         break;
2739     case 'L':
2740         t = p[1];
2741         if (t != '\\' && t != '\'' && t != '\"') {
2742             /* fast case */
2743             goto parse_ident_fast;
2744         } else {
2745             PEEKC(c, p);
2746             if (c == '\'' || c == '\"') {
2747                 is_long = 1;
2748                 goto str_const;
2749             } else {
2750                 cstr_reset(&tokcstr);
2751                 cstr_ccat(&tokcstr, 'L');
2752                 goto parse_ident_slow;
2753             }
2754         }
2755         break;
2756 
2757     case '0': case '1': case '2': case '3':
2758     case '4': case '5': case '6': case '7':
2759     case '8': case '9':
2760         t = c;
2761         PEEKC(c, p);
2762         /* after the first digit, accept digits, alpha, '.' or sign if
2763            prefixed by 'eEpP' */
2764     parse_num:
2765         cstr_reset(&tokcstr);
2766         for(;;) {
2767             cstr_ccat(&tokcstr, t);
2768             if (!((isidnum_table[c - CH_EOF] & (IS_ID|IS_NUM))
2769                   || c == '.'
2770                   || ((c == '+' || c == '-')
2771                       && (((t == 'e' || t == 'E')
2772                             && !(parse_flags & PARSE_FLAG_ASM_FILE
2773                                 /* 0xe+1 is 3 tokens in asm */
2774                                 && ((char*)tokcstr.data)[0] == '0'
2775                                 && toup(((char*)tokcstr.data)[1]) == 'X'))
2776                           || t == 'p' || t == 'P'))))
2777                 break;
2778             t = c;
2779             PEEKC(c, p);
2780         }
2781         /* We add a trailing '\0' to ease parsing */
2782         cstr_ccat(&tokcstr, '\0');
2783         tokc.str.size = tokcstr.size;
2784         tokc.str.data = tokcstr.data;
2785         tok = TOK_PPNUM;
2786         break;
2787 
2788     case '.':
2789         /* special dot handling because it can also start a number */
2790         PEEKC(c, p);
2791         if (isnum(c)) {
2792             t = '.';
2793             goto parse_num;
2794         } else if ((isidnum_table['.' - CH_EOF] & IS_ID)
2795                    && (isidnum_table[c - CH_EOF] & (IS_ID|IS_NUM))) {
2796             *--p = c = '.';
2797             goto parse_ident_fast;
2798         } else if (c == '.') {
2799             PEEKC(c, p);
2800             if (c == '.') {
2801                 p++;
2802                 tok = TOK_DOTS;
2803             } else {
2804                 *--p = '.'; /* may underflow into file->unget[] */
2805                 tok = '.';
2806             }
2807         } else {
2808             tok = '.';
2809         }
2810         break;
2811     case '\'':
2812     case '\"':
2813         is_long = 0;
2814     str_const:
2815         cstr_reset(&tokcstr);
2816         if (is_long)
2817             cstr_ccat(&tokcstr, 'L');
2818         cstr_ccat(&tokcstr, c);
2819         p = parse_pp_string(p, c, &tokcstr);
2820         cstr_ccat(&tokcstr, c);
2821         cstr_ccat(&tokcstr, '\0');
2822         tokc.str.size = tokcstr.size;
2823         tokc.str.data = tokcstr.data;
2824         tok = TOK_PPSTR;
2825         break;
2826 
2827     case '<':
2828         PEEKC(c, p);
2829         if (c == '=') {
2830             p++;
2831             tok = TOK_LE;
2832         } else if (c == '<') {
2833             PEEKC(c, p);
2834             if (c == '=') {
2835                 p++;
2836                 tok = TOK_A_SHL;
2837             } else {
2838                 tok = TOK_SHL;
2839             }
2840         } else {
2841             tok = TOK_LT;
2842         }
2843         break;
2844     case '>':
2845         PEEKC(c, p);
2846         if (c == '=') {
2847             p++;
2848             tok = TOK_GE;
2849         } else if (c == '>') {
2850             PEEKC(c, p);
2851             if (c == '=') {
2852                 p++;
2853                 tok = TOK_A_SAR;
2854             } else {
2855                 tok = TOK_SAR;
2856             }
2857         } else {
2858             tok = TOK_GT;
2859         }
2860         break;
2861 
2862     case '&':
2863         PEEKC(c, p);
2864         if (c == '&') {
2865             p++;
2866             tok = TOK_LAND;
2867         } else if (c == '=') {
2868             p++;
2869             tok = TOK_A_AND;
2870         } else {
2871             tok = '&';
2872         }
2873         break;
2874 
2875     case '|':
2876         PEEKC(c, p);
2877         if (c == '|') {
2878             p++;
2879             tok = TOK_LOR;
2880         } else if (c == '=') {
2881             p++;
2882             tok = TOK_A_OR;
2883         } else {
2884             tok = '|';
2885         }
2886         break;
2887 
2888     case '+':
2889         PEEKC(c, p);
2890         if (c == '+') {
2891             p++;
2892             tok = TOK_INC;
2893         } else if (c == '=') {
2894             p++;
2895             tok = TOK_A_ADD;
2896         } else {
2897             tok = '+';
2898         }
2899         break;
2900 
2901     case '-':
2902         PEEKC(c, p);
2903         if (c == '-') {
2904             p++;
2905             tok = TOK_DEC;
2906         } else if (c == '=') {
2907             p++;
2908             tok = TOK_A_SUB;
2909         } else if (c == '>') {
2910             p++;
2911             tok = TOK_ARROW;
2912         } else {
2913             tok = '-';
2914         }
2915         break;
2916 
2917     PARSE2('!', '!', '=', TOK_NE)
2918     PARSE2('=', '=', '=', TOK_EQ)
2919     PARSE2('*', '*', '=', TOK_A_MUL)
2920     PARSE2('%', '%', '=', TOK_A_MOD)
2921     PARSE2('^', '^', '=', TOK_A_XOR)
2922 
2923         /* comments or operator */
2924     case '/':
2925         PEEKC(c, p);
2926         if (c == '*') {
2927             p = parse_comment(p);
2928             /* comments replaced by a blank */
2929             tok = ' ';
2930             goto keep_tok_flags;
2931         } else if (c == '/') {
2932             p = parse_line_comment(p);
2933             tok = ' ';
2934             goto keep_tok_flags;
2935         } else if (c == '=') {
2936             p++;
2937             tok = TOK_A_DIV;
2938         } else {
2939             tok = '/';
2940         }
2941         break;
2942 
2943         /* simple tokens */
2944     case '(':
2945     case ')':
2946     case '[':
2947     case ']':
2948     case '{':
2949     case '}':
2950     case ',':
2951     case ';':
2952     case ':':
2953     case '?':
2954     case '~':
2955     case '@': /* only used in assembler */
2956     parse_simple:
2957         tok = c;
2958         p++;
2959         break;
2960     default:
2961         if (c >= 0x80 && c <= 0xFF) /* utf8 identifiers */
2962 	    goto parse_ident_fast;
2963         if (parse_flags & PARSE_FLAG_ASM_FILE)
2964             goto parse_simple;
2965         tcc_error("unrecognized character \\x%02x", c);
2966         break;
2967     }
2968     tok_flags = 0;
2969 keep_tok_flags:
2970     file->buf_ptr = p;
2971 #if defined(PARSE_DEBUG)
2972     printf("token = %d %s\n", tok, get_tok_str(tok, &tokc));
2973 #endif
2974 }
2975 
2976 /* return next token without macro substitution. Can read input from
2977    macro_ptr buffer */
next_nomacro_spc(void)2978 static void next_nomacro_spc(void)
2979 {
2980     if (macro_ptr) {
2981     redo:
2982         tok = *macro_ptr;
2983         if (tok) {
2984             TOK_GET(&tok, &macro_ptr, &tokc);
2985             if (tok == TOK_LINENUM) {
2986                 file->line_num = tokc.i;
2987                 goto redo;
2988             }
2989         }
2990     } else {
2991         next_nomacro1();
2992     }
2993     //printf("token = %s\n", get_tok_str(tok, &tokc));
2994 }
2995 
next_nomacro(void)2996 ST_FUNC void next_nomacro(void)
2997 {
2998     do {
2999         next_nomacro_spc();
3000     } while (tok < 256 && (isidnum_table[tok - CH_EOF] & IS_SPC));
3001 }
3002 
3003 
3004 static void macro_subst(
3005     TokenString *tok_str,
3006     Sym **nested_list,
3007     const int *macro_str
3008     );
3009 
3010 /* substitute arguments in replacement lists in macro_str by the values in
3011    args (field d) and return allocated string */
macro_arg_subst(Sym **nested_list, const int *macro_str, Sym *args)3012 static int *macro_arg_subst(Sym **nested_list, const int *macro_str, Sym *args)
3013 {
3014     int t, t0, t1, spc;
3015     const int *st;
3016     Sym *s;
3017     CValue cval;
3018     TokenString str;
3019     CString cstr;
3020 
3021     tok_str_new(&str);
3022     t0 = t1 = 0;
3023     while(1) {
3024         TOK_GET(&t, &macro_str, &cval);
3025         if (!t)
3026             break;
3027         if (t == '#') {
3028             /* stringize */
3029             TOK_GET(&t, &macro_str, &cval);
3030             if (!t)
3031                 goto bad_stringy;
3032             s = sym_find2(args, t);
3033             if (s) {
3034                 cstr_new(&cstr);
3035                 cstr_ccat(&cstr, '\"');
3036                 st = s->d;
3037                 spc = 0;
3038                 while (*st >= 0) {
3039                     TOK_GET(&t, &st, &cval);
3040                     if (t != TOK_PLCHLDR
3041                      && t != TOK_NOSUBST
3042                      && 0 == check_space(t, &spc)) {
3043                         const char *s = get_tok_str(t, &cval);
3044                         while (*s) {
3045                             if (t == TOK_PPSTR && *s != '\'')
3046                                 add_char(&cstr, *s);
3047                             else
3048                                 cstr_ccat(&cstr, *s);
3049                             ++s;
3050                         }
3051                     }
3052                 }
3053                 cstr.size -= spc;
3054                 cstr_ccat(&cstr, '\"');
3055                 cstr_ccat(&cstr, '\0');
3056 #ifdef PP_DEBUG
3057                 printf("\nstringize: <%s>\n", (char *)cstr.data);
3058 #endif
3059                 /* add string */
3060                 cval.str.size = cstr.size;
3061                 cval.str.data = cstr.data;
3062                 tok_str_add2(&str, TOK_PPSTR, &cval);
3063                 cstr_free(&cstr);
3064             } else {
3065         bad_stringy:
3066                 expect("macro parameter after '#'");
3067             }
3068         } else if (t >= TOK_IDENT) {
3069             s = sym_find2(args, t);
3070             if (s) {
3071                 int l0 = str.len;
3072                 st = s->d;
3073                 /* if '##' is present before or after, no arg substitution */
3074                 if (*macro_str == TOK_PPJOIN || t1 == TOK_PPJOIN) {
3075                     /* special case for var arg macros : ## eats the ','
3076                        if empty VA_ARGS variable. */
3077                     if (t1 == TOK_PPJOIN && t0 == ',' && gnu_ext && s->type.t) {
3078                         if (*st <= 0) {
3079                             /* suppress ',' '##' */
3080                             str.len -= 2;
3081                         } else {
3082                             /* suppress '##' and add variable */
3083                             str.len--;
3084                             goto add_var;
3085                         }
3086                     }
3087                 } else {
3088             add_var:
3089 		    if (!s->next) {
3090 			/* Expand arguments tokens and store them.  In most
3091 			   cases we could also re-expand each argument if
3092 			   used multiple times, but not if the argument
3093 			   contains the __COUNTER__ macro.  */
3094 			TokenString str2;
3095 			sym_push2(&s->next, s->v, s->type.t, 0);
3096 			tok_str_new(&str2);
3097 			macro_subst(&str2, nested_list, st);
3098 			tok_str_add(&str2, 0);
3099 			s->next->d = str2.str;
3100 		    }
3101 		    st = s->next->d;
3102                 }
3103                 for(;;) {
3104                     int t2;
3105                     TOK_GET(&t2, &st, &cval);
3106                     if (t2 <= 0)
3107                         break;
3108                     tok_str_add2(&str, t2, &cval);
3109                 }
3110                 if (str.len == l0) /* expanded to empty string */
3111                     tok_str_add(&str, TOK_PLCHLDR);
3112             } else {
3113                 tok_str_add(&str, t);
3114             }
3115         } else {
3116             tok_str_add2(&str, t, &cval);
3117         }
3118         t0 = t1, t1 = t;
3119     }
3120     tok_str_add(&str, 0);
3121     return str.str;
3122 }
3123 
3124 static char const ab_month_name[12][4] =
3125 {
3126     "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3127     "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
3128 };
3129 
paste_tokens(int t1, CValue *v1, int t2, CValue *v2)3130 static int paste_tokens(int t1, CValue *v1, int t2, CValue *v2)
3131 {
3132     CString cstr;
3133     int n, ret = 1;
3134 
3135     cstr_new(&cstr);
3136     if (t1 != TOK_PLCHLDR)
3137         cstr_cat(&cstr, get_tok_str(t1, v1), -1);
3138     n = cstr.size;
3139     if (t2 != TOK_PLCHLDR)
3140         cstr_cat(&cstr, get_tok_str(t2, v2), -1);
3141     cstr_ccat(&cstr, '\0');
3142 
3143     tcc_open_bf(tcc_state, ":paste:", cstr.size);
3144     memcpy(file->buffer, cstr.data, cstr.size);
3145     tok_flags = 0;
3146     for (;;) {
3147         next_nomacro1();
3148         if (0 == *file->buf_ptr)
3149             break;
3150         if (is_space(tok))
3151             continue;
3152         tcc_warning("pasting \"%.*s\" and \"%s\" does not give a valid"
3153             " preprocessing token", n, (char *)cstr.data, (char*)cstr.data + n);
3154         ret = 0;
3155         break;
3156     }
3157     tcc_close();
3158     //printf("paste <%s>\n", (char*)cstr.data);
3159     cstr_free(&cstr);
3160     return ret;
3161 }
3162 
3163 /* handle the '##' operator. Return NULL if no '##' seen. Otherwise
3164    return the resulting string (which must be freed). */
macro_twosharps(const int *ptr0)3165 static inline int *macro_twosharps(const int *ptr0)
3166 {
3167     int t;
3168     CValue cval;
3169     TokenString macro_str1;
3170     int start_of_nosubsts = -1;
3171     const int *ptr;
3172 
3173     /* we search the first '##' */
3174     for (ptr = ptr0;;) {
3175         TOK_GET(&t, &ptr, &cval);
3176         if (t == TOK_PPJOIN)
3177             break;
3178         if (t == 0)
3179             return NULL;
3180     }
3181 
3182     tok_str_new(&macro_str1);
3183 
3184     //tok_print(" $$$", ptr0);
3185     for (ptr = ptr0;;) {
3186         TOK_GET(&t, &ptr, &cval);
3187         if (t == 0)
3188             break;
3189         if (t == TOK_PPJOIN)
3190             continue;
3191         while (*ptr == TOK_PPJOIN) {
3192             int t1; CValue cv1;
3193             /* given 'a##b', remove nosubsts preceding 'a' */
3194             if (start_of_nosubsts >= 0)
3195                 macro_str1.len = start_of_nosubsts;
3196             /* given 'a##b', remove nosubsts preceding 'b' */
3197             while ((t1 = *++ptr) == TOK_NOSUBST)
3198                 ;
3199             if (t1 && t1 != TOK_PPJOIN) {
3200                 TOK_GET(&t1, &ptr, &cv1);
3201                 if (t != TOK_PLCHLDR || t1 != TOK_PLCHLDR) {
3202                     if (paste_tokens(t, &cval, t1, &cv1)) {
3203                         t = tok, cval = tokc;
3204                     } else {
3205                         tok_str_add2(&macro_str1, t, &cval);
3206                         t = t1, cval = cv1;
3207                     }
3208                 }
3209             }
3210         }
3211         if (t == TOK_NOSUBST) {
3212             if (start_of_nosubsts < 0)
3213                 start_of_nosubsts = macro_str1.len;
3214         } else {
3215             start_of_nosubsts = -1;
3216         }
3217         tok_str_add2(&macro_str1, t, &cval);
3218     }
3219     tok_str_add(&macro_str1, 0);
3220     //tok_print(" ###", macro_str1.str);
3221     return macro_str1.str;
3222 }
3223 
3224 /* peek or read [ws_str == NULL] next token from function macro call,
3225    walking up macro levels up to the file if necessary */
next_argstream(Sym **nested_list, TokenString *ws_str)3226 static int next_argstream(Sym **nested_list, TokenString *ws_str)
3227 {
3228     int t;
3229     const int *p;
3230     Sym *sa;
3231 
3232     for (;;) {
3233         if (macro_ptr) {
3234             p = macro_ptr, t = *p;
3235             if (ws_str) {
3236                 while (is_space(t) || TOK_LINEFEED == t || TOK_PLCHLDR == t)
3237                     tok_str_add(ws_str, t), t = *++p;
3238             }
3239             if (t == 0) {
3240                 end_macro();
3241                 /* also, end of scope for nested defined symbol */
3242                 sa = *nested_list;
3243                 while (sa && sa->v == 0)
3244                     sa = sa->prev;
3245                 if (sa)
3246                     sa->v = 0;
3247                 continue;
3248             }
3249         } else {
3250             ch = handle_eob();
3251             if (ws_str) {
3252                 while (is_space(ch) || ch == '\n' || ch == '/') {
3253                     if (ch == '/') {
3254                         int c;
3255                         uint8_t *p = file->buf_ptr;
3256                         PEEKC(c, p);
3257                         if (c == '*') {
3258                             p = parse_comment(p);
3259                             file->buf_ptr = p - 1;
3260                         } else if (c == '/') {
3261                             p = parse_line_comment(p);
3262                             file->buf_ptr = p - 1;
3263                         } else
3264                             break;
3265                         ch = ' ';
3266                     }
3267                     if (ch == '\n')
3268                         file->line_num++;
3269                     if (!(ch == '\f' || ch == '\v' || ch == '\r'))
3270                         tok_str_add(ws_str, ch);
3271                     cinp();
3272                 }
3273             }
3274             t = ch;
3275         }
3276 
3277         if (ws_str)
3278             return t;
3279         next_nomacro_spc();
3280         return tok;
3281     }
3282 }
3283 
3284 /* do macro substitution of current token with macro 's' and add
3285    result to (tok_str,tok_len). 'nested_list' is the list of all
3286    macros we got inside to avoid recursing. Return non zero if no
3287    substitution needs to be done */
macro_subst_tok( TokenString *tok_str, Sym **nested_list, Sym *s)3288 static int macro_subst_tok(
3289     TokenString *tok_str,
3290     Sym **nested_list,
3291     Sym *s)
3292 {
3293     Sym *args, *sa, *sa1;
3294     int parlevel, t, t1, spc;
3295     TokenString str;
3296     char *cstrval;
3297     CValue cval;
3298     CString cstr;
3299     char buf[32];
3300 
3301     /* if symbol is a macro, prepare substitution */
3302     /* special macros */
3303     if (tok == TOK___LINE__ || tok == TOK___COUNTER__) {
3304         t = tok == TOK___LINE__ ? file->line_num : pp_counter++;
3305         snprintf(buf, sizeof(buf), "%d", t);
3306         cstrval = buf;
3307         t1 = TOK_PPNUM;
3308         goto add_cstr1;
3309     } else if (tok == TOK___FILE__) {
3310         cstrval = file->filename;
3311         goto add_cstr;
3312     } else if (tok == TOK___DATE__ || tok == TOK___TIME__) {
3313         time_t ti;
3314         struct tm *tm;
3315 
3316         time(&ti);
3317         tm = localtime(&ti);
3318         if (tok == TOK___DATE__) {
3319             snprintf(buf, sizeof(buf), "%s %2d %d",
3320                      ab_month_name[tm->tm_mon], tm->tm_mday, tm->tm_year + 1900);
3321         } else {
3322             snprintf(buf, sizeof(buf), "%02d:%02d:%02d",
3323                      tm->tm_hour, tm->tm_min, tm->tm_sec);
3324         }
3325         cstrval = buf;
3326     add_cstr:
3327         t1 = TOK_STR;
3328     add_cstr1:
3329         cstr_new(&cstr);
3330         cstr_cat(&cstr, cstrval, 0);
3331         cval.str.size = cstr.size;
3332         cval.str.data = cstr.data;
3333         tok_str_add2(tok_str, t1, &cval);
3334         cstr_free(&cstr);
3335     } else if (s->d) {
3336         int saved_parse_flags = parse_flags;
3337 	int *joined_str = NULL;
3338         int *mstr = s->d;
3339 
3340         if (s->type.t == MACRO_FUNC) {
3341             /* whitespace between macro name and argument list */
3342             TokenString ws_str;
3343             tok_str_new(&ws_str);
3344 
3345             spc = 0;
3346             parse_flags |= PARSE_FLAG_SPACES | PARSE_FLAG_LINEFEED
3347                 | PARSE_FLAG_ACCEPT_STRAYS;
3348 
3349             /* get next token from argument stream */
3350             t = next_argstream(nested_list, &ws_str);
3351             if (t != '(') {
3352                 /* not a macro substitution after all, restore the
3353                  * macro token plus all whitespace we've read.
3354                  * whitespace is intentionally not merged to preserve
3355                  * newlines. */
3356                 parse_flags = saved_parse_flags;
3357                 tok_str_add(tok_str, tok);
3358                 if (parse_flags & PARSE_FLAG_SPACES) {
3359                     int i;
3360                     for (i = 0; i < ws_str.len; i++)
3361                         tok_str_add(tok_str, ws_str.str[i]);
3362                 }
3363                 tok_str_free_str(ws_str.str);
3364                 return 0;
3365             } else {
3366                 tok_str_free_str(ws_str.str);
3367             }
3368 	    do {
3369 		next_nomacro(); /* eat '(' */
3370 	    } while (tok == TOK_PLCHLDR);
3371 
3372             /* argument macro */
3373             args = NULL;
3374             sa = s->next;
3375             /* NOTE: empty args are allowed, except if no args */
3376             for(;;) {
3377                 do {
3378                     next_argstream(nested_list, NULL);
3379                 } while (is_space(tok) || TOK_LINEFEED == tok);
3380     empty_arg:
3381                 /* handle '()' case */
3382                 if (!args && !sa && tok == ')')
3383                     break;
3384                 if (!sa)
3385                     tcc_error("macro '%s' used with too many args",
3386                           get_tok_str(s->v, 0));
3387                 tok_str_new(&str);
3388                 parlevel = spc = 0;
3389                 /* NOTE: non zero sa->t indicates VA_ARGS */
3390                 while ((parlevel > 0 ||
3391                         (tok != ')' &&
3392                          (tok != ',' || sa->type.t)))) {
3393                     if (tok == TOK_EOF || tok == 0)
3394                         break;
3395                     if (tok == '(')
3396                         parlevel++;
3397                     else if (tok == ')')
3398                         parlevel--;
3399                     if (tok == TOK_LINEFEED)
3400                         tok = ' ';
3401                     if (!check_space(tok, &spc))
3402                         tok_str_add2(&str, tok, &tokc);
3403                     next_argstream(nested_list, NULL);
3404                 }
3405                 if (parlevel)
3406                     expect(")");
3407                 str.len -= spc;
3408                 tok_str_add(&str, -1);
3409                 tok_str_add(&str, 0);
3410                 sa1 = sym_push2(&args, sa->v & ~SYM_FIELD, sa->type.t, 0);
3411                 sa1->d = str.str;
3412                 sa = sa->next;
3413                 if (tok == ')') {
3414                     /* special case for gcc var args: add an empty
3415                        var arg argument if it is omitted */
3416                     if (sa && sa->type.t && gnu_ext)
3417                         goto empty_arg;
3418                     break;
3419                 }
3420                 if (tok != ',')
3421                     expect(",");
3422             }
3423             if (sa) {
3424                 tcc_error("macro '%s' used with too few args",
3425                       get_tok_str(s->v, 0));
3426             }
3427 
3428             parse_flags = saved_parse_flags;
3429 
3430             /* now subst each arg */
3431             mstr = macro_arg_subst(nested_list, mstr, args);
3432             /* free memory */
3433             sa = args;
3434             while (sa) {
3435                 sa1 = sa->prev;
3436                 tok_str_free_str(sa->d);
3437                 if (sa->next) {
3438                     tok_str_free_str(sa->next->d);
3439                     sym_free(sa->next);
3440                 }
3441                 sym_free(sa);
3442                 sa = sa1;
3443             }
3444         }
3445 
3446         sym_push2(nested_list, s->v, 0, 0);
3447         parse_flags = saved_parse_flags;
3448         joined_str = macro_twosharps(mstr);
3449         macro_subst(tok_str, nested_list, joined_str ? joined_str : mstr);
3450 
3451         /* pop nested defined symbol */
3452         sa1 = *nested_list;
3453         *nested_list = sa1->prev;
3454         sym_free(sa1);
3455 	if (joined_str)
3456 	    tok_str_free_str(joined_str);
3457         if (mstr != s->d)
3458             tok_str_free_str(mstr);
3459     }
3460     return 0;
3461 }
3462 
3463 /* do macro substitution of macro_str and add result to
3464    (tok_str,tok_len). 'nested_list' is the list of all macros we got
3465    inside to avoid recursing. */
macro_subst( TokenString *tok_str, Sym **nested_list, const int *macro_str )3466 static void macro_subst(
3467     TokenString *tok_str,
3468     Sym **nested_list,
3469     const int *macro_str
3470     )
3471 {
3472     Sym *s;
3473     int t, spc, nosubst;
3474     CValue cval;
3475 
3476     spc = nosubst = 0;
3477 
3478     while (1) {
3479         TOK_GET(&t, &macro_str, &cval);
3480         if (t <= 0)
3481             break;
3482 
3483         if (t >= TOK_IDENT && 0 == nosubst) {
3484             s = define_find(t);
3485             if (s == NULL)
3486                 goto no_subst;
3487 
3488             /* if nested substitution, do nothing */
3489             if (sym_find2(*nested_list, t)) {
3490                 /* and mark it as TOK_NOSUBST, so it doesn't get subst'd again */
3491                 tok_str_add2(tok_str, TOK_NOSUBST, NULL);
3492                 goto no_subst;
3493             }
3494 
3495             {
3496                 TokenString *str = tok_str_alloc();
3497                 str->str = (int*)macro_str;
3498                 begin_macro(str, 2);
3499 
3500                 tok = t;
3501                 macro_subst_tok(tok_str, nested_list, s);
3502 
3503                 if (macro_stack != str) {
3504                     /* already finished by reading function macro arguments */
3505                     break;
3506                 }
3507 
3508                 macro_str = macro_ptr;
3509                 end_macro ();
3510             }
3511             if (tok_str->len)
3512                 spc = is_space(t = tok_str->str[tok_str->lastlen]);
3513         } else {
3514             if (t == '\\' && !(parse_flags & PARSE_FLAG_ACCEPT_STRAYS))
3515                 tcc_error("stray '\\' in program");
3516 no_subst:
3517             if (!check_space(t, &spc))
3518                 tok_str_add2(tok_str, t, &cval);
3519 
3520             if (nosubst) {
3521                 if (nosubst > 1 && (spc || (++nosubst == 3 && t == '(')))
3522                     continue;
3523                 nosubst = 0;
3524             }
3525             if (t == TOK_NOSUBST)
3526                 nosubst = 1;
3527         }
3528         /* GCC supports 'defined' as result of a macro substitution */
3529         if (t == TOK_DEFINED && pp_expr)
3530             nosubst = 2;
3531     }
3532 }
3533 
3534 /* return next token with macro substitution */
next(void)3535 ST_FUNC void next(void)
3536 {
3537  redo:
3538     if (parse_flags & PARSE_FLAG_SPACES)
3539         next_nomacro_spc();
3540     else
3541         next_nomacro();
3542 
3543     if (macro_ptr) {
3544         if (tok == TOK_NOSUBST || tok == TOK_PLCHLDR) {
3545         /* discard preprocessor markers */
3546             goto redo;
3547         } else if (tok == 0) {
3548             /* end of macro or unget token string */
3549             end_macro();
3550             goto redo;
3551         }
3552     } else if (tok >= TOK_IDENT && (parse_flags & PARSE_FLAG_PREPROCESS)) {
3553         Sym *s;
3554         /* if reading from file, try to substitute macros */
3555         s = define_find(tok);
3556         if (s) {
3557             Sym *nested_list = NULL;
3558             tokstr_buf.len = 0;
3559             macro_subst_tok(&tokstr_buf, &nested_list, s);
3560             tok_str_add(&tokstr_buf, 0);
3561             begin_macro(&tokstr_buf, 0);
3562             goto redo;
3563         }
3564     }
3565     /* convert preprocessor tokens into C tokens */
3566     if (tok == TOK_PPNUM) {
3567         if  (parse_flags & PARSE_FLAG_TOK_NUM)
3568             parse_number((char *)tokc.str.data);
3569     } else if (tok == TOK_PPSTR) {
3570         if (parse_flags & PARSE_FLAG_TOK_STR)
3571             parse_string((char *)tokc.str.data, tokc.str.size - 1);
3572     }
3573 }
3574 
3575 /* push back current token and set current token to 'last_tok'. Only
3576    identifier case handled for labels. */
unget_tok(int last_tok)3577 ST_INLN void unget_tok(int last_tok)
3578 {
3579 
3580     TokenString *str = tok_str_alloc();
3581     tok_str_add2(str, tok, &tokc);
3582     tok_str_add(str, 0);
3583     begin_macro(str, 1);
3584     tok = last_tok;
3585 }
3586 
preprocess_start(TCCState *s1, int is_asm)3587 ST_FUNC void preprocess_start(TCCState *s1, int is_asm)
3588 {
3589     CString cstr;
3590 
3591     tccpp_new(s1);
3592 
3593     s1->include_stack_ptr = s1->include_stack;
3594     s1->ifdef_stack_ptr = s1->ifdef_stack;
3595     file->ifdef_stack_ptr = s1->ifdef_stack_ptr;
3596     pp_expr = 0;
3597     pp_counter = 0;
3598     pp_debug_tok = pp_debug_symv = 0;
3599     pp_once++;
3600     s1->pack_stack[0] = 0;
3601     s1->pack_stack_ptr = s1->pack_stack;
3602 
3603     set_idnum('$', s1->dollars_in_identifiers ? IS_ID : 0);
3604     set_idnum('.', is_asm ? IS_ID : 0);
3605 
3606     cstr_new(&cstr);
3607     if (s1->cmdline_defs.size)
3608         cstr_cat(&cstr, s1->cmdline_defs.data, s1->cmdline_defs.size);
3609     cstr_printf(&cstr, "#define __BASE_FILE__ \"%s\"\n", file->filename);
3610     if (is_asm)
3611         cstr_printf(&cstr, "#define __ASSEMBLER__ 1\n");
3612     if (s1->output_type == TCC_OUTPUT_MEMORY)
3613         cstr_printf(&cstr, "#define __TCC_RUN__ 1\n");
3614     if (!is_asm && s1->output_type != TCC_OUTPUT_PREPROCESS)
3615         cstr_cat(&cstr, "#include \"tcc_predefs.h\"\n", -1);
3616     if (s1->cmdline_incl.size)
3617         cstr_cat(&cstr, s1->cmdline_incl.data, s1->cmdline_incl.size);
3618     //printf("%s\n", (char*)cstr.data);
3619     *s1->include_stack_ptr++ = file;
3620     tcc_open_bf(s1, "<command line>", cstr.size);
3621     memcpy(file->buffer, cstr.data, cstr.size);
3622     cstr_free(&cstr);
3623 
3624     parse_flags = is_asm ? PARSE_FLAG_ASM_FILE : 0;
3625     tok_flags = TOK_FLAG_BOL | TOK_FLAG_BOF;
3626 }
3627 
3628 /* cleanup from error/setjmp */
preprocess_end(TCCState *s1)3629 ST_FUNC void preprocess_end(TCCState *s1)
3630 {
3631     while (macro_stack)
3632         end_macro();
3633     macro_ptr = NULL;
3634     while (file)
3635         tcc_close();
3636     tccpp_delete(s1);
3637 }
3638 
tccpp_new(TCCState *s)3639 ST_FUNC void tccpp_new(TCCState *s)
3640 {
3641     int i, c;
3642     const char *p, *r;
3643 
3644     /* init isid table */
3645     for(i = CH_EOF; i<128; i++)
3646         set_idnum(i,
3647             is_space(i) ? IS_SPC
3648             : isid(i) ? IS_ID
3649             : isnum(i) ? IS_NUM
3650             : 0);
3651 
3652     for(i = 128; i<256; i++)
3653         set_idnum(i, IS_ID);
3654 
3655     /* init allocators */
3656     tal_new(&toksym_alloc, TOKSYM_TAL_LIMIT, TOKSYM_TAL_SIZE);
3657     tal_new(&tokstr_alloc, TOKSTR_TAL_LIMIT, TOKSTR_TAL_SIZE);
3658 
3659     memset(hash_ident, 0, TOK_HASH_SIZE * sizeof(TokenSym *));
3660     memset(s->cached_includes_hash, 0, sizeof s->cached_includes_hash);
3661 
3662     cstr_new(&cstr_buf);
3663     cstr_realloc(&cstr_buf, STRING_MAX_SIZE);
3664     tok_str_new(&tokstr_buf);
3665     tok_str_realloc(&tokstr_buf, TOKSTR_MAX_SIZE);
3666 
3667     tok_ident = TOK_IDENT;
3668     p = tcc_keywords;
3669     while (*p) {
3670         r = p;
3671         for(;;) {
3672             c = *r++;
3673             if (c == '\0')
3674                 break;
3675         }
3676         tok_alloc(p, r - p - 1);
3677         p = r;
3678     }
3679 
3680     /* we add dummy defines for some special macros to speed up tests
3681        and to have working defined() */
3682     define_push(TOK___LINE__, MACRO_OBJ, NULL, NULL);
3683     define_push(TOK___FILE__, MACRO_OBJ, NULL, NULL);
3684     define_push(TOK___DATE__, MACRO_OBJ, NULL, NULL);
3685     define_push(TOK___TIME__, MACRO_OBJ, NULL, NULL);
3686     define_push(TOK___COUNTER__, MACRO_OBJ, NULL, NULL);
3687 }
3688 
tccpp_delete(TCCState *s)3689 ST_FUNC void tccpp_delete(TCCState *s)
3690 {
3691     int i, n;
3692 
3693     dynarray_reset(&s->cached_includes, &s->nb_cached_includes);
3694 
3695     /* free tokens */
3696     n = tok_ident - TOK_IDENT;
3697     if (n > total_idents)
3698         total_idents = n;
3699     for(i = 0; i < n; i++)
3700         tal_free(toksym_alloc, table_ident[i]);
3701     tcc_free(table_ident);
3702     table_ident = NULL;
3703 
3704     /* free static buffers */
3705     cstr_free(&tokcstr);
3706     cstr_free(&cstr_buf);
3707     cstr_free(&macro_equal_buf);
3708     tok_str_free_str(tokstr_buf.str);
3709 
3710     /* free allocators */
3711     tal_delete(toksym_alloc);
3712     toksym_alloc = NULL;
3713     tal_delete(tokstr_alloc);
3714     tokstr_alloc = NULL;
3715 }
3716 
3717 /* ------------------------------------------------------------------------- */
3718 /* tcc -E [-P[1]] [-dD} support */
3719 
tok_print(const char *msg, const int *str)3720 static void tok_print(const char *msg, const int *str)
3721 {
3722     FILE *fp;
3723     int t, s = 0;
3724     CValue cval;
3725 
3726     fp = tcc_state->ppfp;
3727     fprintf(fp, "%s", msg);
3728     while (str) {
3729 	TOK_GET(&t, &str, &cval);
3730 	if (!t)
3731 	    break;
3732 	fprintf(fp, " %s" + s, get_tok_str(t, &cval)), s = 1;
3733     }
3734     fprintf(fp, "\n");
3735 }
3736 
pp_line(TCCState *s1, BufferedFile *f, int level)3737 static void pp_line(TCCState *s1, BufferedFile *f, int level)
3738 {
3739     int d = f->line_num - f->line_ref;
3740 
3741     if (s1->dflag & 4)
3742 	return;
3743 
3744     if (s1->Pflag == LINE_MACRO_OUTPUT_FORMAT_NONE) {
3745         ;
3746     } else if (level == 0 && f->line_ref && d < 8) {
3747 	while (d > 0)
3748 	    fputs("\n", s1->ppfp), --d;
3749     } else if (s1->Pflag == LINE_MACRO_OUTPUT_FORMAT_STD) {
3750 	fprintf(s1->ppfp, "#line %d \"%s\"\n", f->line_num, f->filename);
3751     } else {
3752 	fprintf(s1->ppfp, "# %d \"%s\"%s\n", f->line_num, f->filename,
3753 	    level > 0 ? " 1" : level < 0 ? " 2" : "");
3754     }
3755     f->line_ref = f->line_num;
3756 }
3757 
define_print(TCCState *s1, int v)3758 static void define_print(TCCState *s1, int v)
3759 {
3760     FILE *fp;
3761     Sym *s;
3762 
3763     s = define_find(v);
3764     if (NULL == s || NULL == s->d)
3765         return;
3766 
3767     fp = s1->ppfp;
3768     fprintf(fp, "#define %s", get_tok_str(v, NULL));
3769     if (s->type.t == MACRO_FUNC) {
3770         Sym *a = s->next;
3771         fprintf(fp,"(");
3772         if (a)
3773             for (;;) {
3774                 fprintf(fp,"%s", get_tok_str(a->v & ~SYM_FIELD, NULL));
3775                 if (!(a = a->next))
3776                     break;
3777                 fprintf(fp,",");
3778             }
3779         fprintf(fp,")");
3780     }
3781     tok_print("", s->d);
3782 }
3783 
pp_debug_defines(TCCState *s1)3784 static void pp_debug_defines(TCCState *s1)
3785 {
3786     int v, t;
3787     const char *vs;
3788     FILE *fp;
3789 
3790     t = pp_debug_tok;
3791     if (t == 0)
3792         return;
3793 
3794     file->line_num--;
3795     pp_line(s1, file, 0);
3796     file->line_ref = ++file->line_num;
3797 
3798     fp = s1->ppfp;
3799     v = pp_debug_symv;
3800     vs = get_tok_str(v, NULL);
3801     if (t == TOK_DEFINE) {
3802         define_print(s1, v);
3803     } else if (t == TOK_UNDEF) {
3804         fprintf(fp, "#undef %s\n", vs);
3805     } else if (t == TOK_push_macro) {
3806         fprintf(fp, "#pragma push_macro(\"%s\")\n", vs);
3807     } else if (t == TOK_pop_macro) {
3808         fprintf(fp, "#pragma pop_macro(\"%s\")\n", vs);
3809     }
3810     pp_debug_tok = 0;
3811 }
3812 
pp_debug_builtins(TCCState *s1)3813 static void pp_debug_builtins(TCCState *s1)
3814 {
3815     int v;
3816     for (v = TOK_IDENT; v < tok_ident; ++v)
3817         define_print(s1, v);
3818 }
3819 
3820 /* Add a space between tokens a and b to avoid unwanted textual pasting */
pp_need_space(int a, int b)3821 static int pp_need_space(int a, int b)
3822 {
3823     return 'E' == a ? '+' == b || '-' == b
3824         : '+' == a ? TOK_INC == b || '+' == b
3825         : '-' == a ? TOK_DEC == b || '-' == b
3826         : a >= TOK_IDENT ? b >= TOK_IDENT
3827 	: a == TOK_PPNUM ? b >= TOK_IDENT
3828         : 0;
3829 }
3830 
3831 /* maybe hex like 0x1e */
pp_check_he0xE(int t, const char *p)3832 static int pp_check_he0xE(int t, const char *p)
3833 {
3834     if (t == TOK_PPNUM && toup(strchr(p, 0)[-1]) == 'E')
3835         return 'E';
3836     return t;
3837 }
3838 
3839 /* Preprocess the current file */
tcc_preprocess(TCCState *s1)3840 ST_FUNC int tcc_preprocess(TCCState *s1)
3841 {
3842     BufferedFile **iptr;
3843     int token_seen, spcs, level;
3844     const char *p;
3845     char white[400];
3846 
3847     parse_flags = PARSE_FLAG_PREPROCESS
3848                 | (parse_flags & PARSE_FLAG_ASM_FILE)
3849                 | PARSE_FLAG_LINEFEED
3850                 | PARSE_FLAG_SPACES
3851                 | PARSE_FLAG_ACCEPT_STRAYS
3852                 ;
3853     /* Credits to Fabrice Bellard's initial revision to demonstrate its
3854        capability to compile and run itself, provided all numbers are
3855        given as decimals. tcc -E -P10 will do. */
3856     if (s1->Pflag == LINE_MACRO_OUTPUT_FORMAT_P10)
3857         parse_flags |= PARSE_FLAG_TOK_NUM, s1->Pflag = 1;
3858 
3859 #ifdef PP_BENCH
3860     /* for PP benchmarks */
3861     do next(); while (tok != TOK_EOF);
3862     return 0;
3863 #endif
3864 
3865     if (s1->dflag & 1) {
3866         pp_debug_builtins(s1);
3867         s1->dflag &= ~1;
3868     }
3869 
3870     token_seen = TOK_LINEFEED, spcs = 0;
3871     pp_line(s1, file, 0);
3872     for (;;) {
3873         iptr = s1->include_stack_ptr;
3874         next();
3875         if (tok == TOK_EOF)
3876             break;
3877 
3878         level = s1->include_stack_ptr - iptr;
3879         if (level) {
3880             if (level > 0)
3881                 pp_line(s1, *iptr, 0);
3882             pp_line(s1, file, level);
3883         }
3884         if (s1->dflag & 7) {
3885             pp_debug_defines(s1);
3886             if (s1->dflag & 4)
3887                 continue;
3888         }
3889 
3890         if (is_space(tok)) {
3891             if (spcs < sizeof white - 1)
3892                 white[spcs++] = tok;
3893             continue;
3894         } else if (tok == TOK_LINEFEED) {
3895             spcs = 0;
3896             if (token_seen == TOK_LINEFEED)
3897                 continue;
3898             ++file->line_ref;
3899         } else if (token_seen == TOK_LINEFEED) {
3900             pp_line(s1, file, 0);
3901         } else if (spcs == 0 && pp_need_space(token_seen, tok)) {
3902             white[spcs++] = ' ';
3903         }
3904 
3905         white[spcs] = 0, fputs(white, s1->ppfp), spcs = 0;
3906         fputs(p = get_tok_str(tok, &tokc), s1->ppfp);
3907         token_seen = pp_check_he0xE(tok, p);
3908     }
3909     return 0;
3910 }
3911 
3912 /* ------------------------------------------------------------------------- */
3913