1 /*
2  *  x86-64 code generator for TCC
3  *
4  *  Copyright (c) 2008 Shinichiro Hamaji
5  *
6  *  Based on i386-gen.c by Fabrice Bellard
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, write to the Free Software
20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
21  */
22 
23 #ifdef TARGET_DEFS_ONLY
24 
25 /* number of available registers */
26 #define NB_REGS         25
27 #define NB_ASM_REGS     16
28 #define CONFIG_TCC_ASM
29 
30 /* a register can belong to several classes. The classes must be
31    sorted from more general to more precise (see gv2() code which does
32    assumptions on it). */
33 #define RC_INT     0x0001 /* generic integer register */
34 #define RC_FLOAT   0x0002 /* generic float register */
35 #define RC_RAX     0x0004
36 #define RC_RCX     0x0008
37 #define RC_RDX     0x0010
38 #define RC_ST0     0x0080 /* only for long double */
39 #define RC_R8      0x0100
40 #define RC_R9      0x0200
41 #define RC_R10     0x0400
42 #define RC_R11     0x0800
43 #define RC_XMM0    0x1000
44 #define RC_XMM1    0x2000
45 #define RC_XMM2    0x4000
46 #define RC_XMM3    0x8000
47 #define RC_XMM4    0x10000
48 #define RC_XMM5    0x20000
49 #define RC_XMM6    0x40000
50 #define RC_XMM7    0x80000
51 #define RC_IRET    RC_RAX /* function return: integer register */
52 #define RC_IRE2    RC_RDX /* function return: second integer register */
53 #define RC_FRET    RC_XMM0 /* function return: float register */
54 #define RC_FRE2    RC_XMM1 /* function return: second float register */
55 
56 /* pretty names for the registers */
57 enum {
58     TREG_RAX = 0,
59     TREG_RCX = 1,
60     TREG_RDX = 2,
61     TREG_RSP = 4,
62     TREG_RSI = 6,
63     TREG_RDI = 7,
64 
65     TREG_R8  = 8,
66     TREG_R9  = 9,
67     TREG_R10 = 10,
68     TREG_R11 = 11,
69 
70     TREG_XMM0 = 16,
71     TREG_XMM1 = 17,
72     TREG_XMM2 = 18,
73     TREG_XMM3 = 19,
74     TREG_XMM4 = 20,
75     TREG_XMM5 = 21,
76     TREG_XMM6 = 22,
77     TREG_XMM7 = 23,
78 
79     TREG_ST0 = 24,
80 
81     TREG_MEM = 0x20
82 };
83 
84 #define REX_BASE(reg) (((reg) >> 3) & 1)
85 #define REG_VALUE(reg) ((reg) & 7)
86 
87 /* return registers for function */
88 #define REG_IRET TREG_RAX /* single word int return register */
89 #define REG_IRE2 TREG_RDX /* second word return register (for long long) */
90 #define REG_FRET TREG_XMM0 /* float return register */
91 #define REG_FRE2 TREG_XMM1 /* second float return register */
92 
93 /* defined if function parameters must be evaluated in reverse order */
94 #define INVERT_FUNC_PARAMS
95 
96 /* pointer size, in bytes */
97 #define PTR_SIZE 8
98 
99 /* long double size and alignment, in bytes */
100 #define LDOUBLE_SIZE  16
101 #define LDOUBLE_ALIGN 16
102 /* maximum alignment (for aligned attribute support) */
103 #define MAX_ALIGN     16
104 
105 /* define if return values need to be extended explicitely
106    at caller side (for interfacing with non-TCC compilers) */
107 #define PROMOTE_RET
108 /******************************************************/
109 #else /* ! TARGET_DEFS_ONLY */
110 /******************************************************/
111 #define USING_GLOBALS
112 #include "tcc.h"
113 #include <assert.h>
114 
115 ST_DATA const int reg_classes[NB_REGS] = {
116     /* eax */ RC_INT | RC_RAX,
117     /* ecx */ RC_INT | RC_RCX,
118     /* edx */ RC_INT | RC_RDX,
119     0,
120     0,
121     0,
122     0,
123     0,
124     RC_R8,
125     RC_R9,
126     RC_R10,
127     RC_R11,
128     0,
129     0,
130     0,
131     0,
132     /* xmm0 */ RC_FLOAT | RC_XMM0,
133     /* xmm1 */ RC_FLOAT | RC_XMM1,
134     /* xmm2 */ RC_FLOAT | RC_XMM2,
135     /* xmm3 */ RC_FLOAT | RC_XMM3,
136     /* xmm4 */ RC_FLOAT | RC_XMM4,
137     /* xmm5 */ RC_FLOAT | RC_XMM5,
138     /* xmm6 an xmm7 are included so gv() can be used on them,
139        but they are not tagged with RC_FLOAT because they are
140        callee saved on Windows */
141     RC_XMM6,
142     RC_XMM7,
143     /* st0 */ RC_ST0
144 };
145 
146 static unsigned long func_sub_sp_offset;
147 static int func_ret_sub;
148 
149 #if defined(CONFIG_TCC_BCHECK)
150 static addr_t func_bound_offset;
151 static unsigned long func_bound_ind;
152 static int func_bound_alloca_used;
153 #endif
154 
155 #ifdef TCC_TARGET_PE
156 static int func_scratch, func_alloca;
157 #endif
158 
159 /* XXX: make it faster ? */
g(int c)160 ST_FUNC void g(int c)
161 {
162     int ind1;
163     if (nocode_wanted)
164         return;
165     ind1 = ind + 1;
166     if (ind1 > cur_text_section->data_allocated)
167         section_realloc(cur_text_section, ind1);
168     cur_text_section->data[ind] = c;
169     ind = ind1;
170 }
171 
o(unsigned int c)172 ST_FUNC void o(unsigned int c)
173 {
174     while (c) {
175         g(c);
176         c = c >> 8;
177     }
178 }
179 
gen_le16(int v)180 ST_FUNC void gen_le16(int v)
181 {
182     g(v);
183     g(v >> 8);
184 }
185 
gen_le32(int c)186 ST_FUNC void gen_le32(int c)
187 {
188     g(c);
189     g(c >> 8);
190     g(c >> 16);
191     g(c >> 24);
192 }
193 
gen_le64(int64_t c)194 ST_FUNC void gen_le64(int64_t c)
195 {
196     g(c);
197     g(c >> 8);
198     g(c >> 16);
199     g(c >> 24);
200     g(c >> 32);
201     g(c >> 40);
202     g(c >> 48);
203     g(c >> 56);
204 }
205 
orex(int ll, int r, int r2, int b)206 static void orex(int ll, int r, int r2, int b)
207 {
208     if ((r & VT_VALMASK) >= VT_CONST)
209         r = 0;
210     if ((r2 & VT_VALMASK) >= VT_CONST)
211         r2 = 0;
212     if (ll || REX_BASE(r) || REX_BASE(r2))
213         o(0x40 | REX_BASE(r) | (REX_BASE(r2) << 2) | (ll << 3));
214     o(b);
215 }
216 
217 /* output a symbol and patch all calls to it */
gsym_addr(int t, int a)218 ST_FUNC void gsym_addr(int t, int a)
219 {
220     while (t) {
221         unsigned char *ptr = cur_text_section->data + t;
222         uint32_t n = read32le(ptr); /* next value */
223         write32le(ptr, a < 0 ? -a : a - t - 4);
224         t = n;
225     }
226 }
227 
is64_type(int t)228 static int is64_type(int t)
229 {
230     return ((t & VT_BTYPE) == VT_PTR ||
231             (t & VT_BTYPE) == VT_FUNC ||
232             (t & VT_BTYPE) == VT_LLONG);
233 }
234 
235 /* instruction + 4 bytes data. Return the address of the data */
oad(int c, int s)236 static int oad(int c, int s)
237 {
238     int t;
239     if (nocode_wanted)
240         return s;
241     o(c);
242     t = ind;
243     gen_le32(s);
244     return t;
245 }
246 
247 /* generate jmp to a label */
248 #define gjmp2(instr,lbl) oad(instr,lbl)
249 
gen_addr32(int r, Sym *sym, int c)250 ST_FUNC void gen_addr32(int r, Sym *sym, int c)
251 {
252     if (r & VT_SYM)
253         greloca(cur_text_section, sym, ind, R_X86_64_32S, c), c=0;
254     gen_le32(c);
255 }
256 
257 /* output constant with relocation if 'r & VT_SYM' is true */
gen_addr64(int r, Sym *sym, int64_t c)258 ST_FUNC void gen_addr64(int r, Sym *sym, int64_t c)
259 {
260     if (r & VT_SYM)
261         greloca(cur_text_section, sym, ind, R_X86_64_64, c), c=0;
262     gen_le64(c);
263 }
264 
265 /* output constant with relocation if 'r & VT_SYM' is true */
gen_addrpc32(int r, Sym *sym, int c)266 ST_FUNC void gen_addrpc32(int r, Sym *sym, int c)
267 {
268     if (r & VT_SYM)
269         greloca(cur_text_section, sym, ind, R_X86_64_PC32, c-4), c=4;
270     gen_le32(c-4);
271 }
272 
273 /* output got address with relocation */
gen_gotpcrel(int r, Sym *sym, int c)274 static void gen_gotpcrel(int r, Sym *sym, int c)
275 {
276 #ifdef TCC_TARGET_PE
277     tcc_error("internal error: no GOT on PE: %s %x %x | %02x %02x %02x\n",
278         get_tok_str(sym->v, NULL), c, r,
279         cur_text_section->data[ind-3],
280         cur_text_section->data[ind-2],
281         cur_text_section->data[ind-1]
282         );
283 #endif
284     greloca(cur_text_section, sym, ind, R_X86_64_GOTPCREL, -4);
285     gen_le32(0);
286     if (c) {
287         /* we use add c, %xxx for displacement */
288         orex(1, r, 0, 0x81);
289         o(0xc0 + REG_VALUE(r));
290         gen_le32(c);
291     }
292 }
293 
gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)294 static void gen_modrm_impl(int op_reg, int r, Sym *sym, int c, int is_got)
295 {
296     op_reg = REG_VALUE(op_reg) << 3;
297     if ((r & VT_VALMASK) == VT_CONST) {
298         /* constant memory reference */
299 	if (!(r & VT_SYM)) {
300 	    /* Absolute memory reference */
301 	    o(0x04 | op_reg); /* [sib] | destreg */
302 	    oad(0x25, c);     /* disp32 */
303 	} else {
304 	    o(0x05 | op_reg); /* (%rip)+disp32 | destreg */
305 	    if (is_got) {
306 		gen_gotpcrel(r, sym, c);
307 	    } else {
308 		gen_addrpc32(r, sym, c);
309 	    }
310 	}
311     } else if ((r & VT_VALMASK) == VT_LOCAL) {
312         /* currently, we use only ebp as base */
313         if (c == (char)c) {
314             /* short reference */
315             o(0x45 | op_reg);
316             g(c);
317         } else {
318             oad(0x85 | op_reg, c);
319         }
320     } else if ((r & VT_VALMASK) >= TREG_MEM) {
321         if (c) {
322             g(0x80 | op_reg | REG_VALUE(r));
323             gen_le32(c);
324         } else {
325             g(0x00 | op_reg | REG_VALUE(r));
326         }
327     } else {
328         g(0x00 | op_reg | REG_VALUE(r));
329     }
330 }
331 
332 /* generate a modrm reference. 'op_reg' contains the additional 3
333    opcode bits */
gen_modrm(int op_reg, int r, Sym *sym, int c)334 static void gen_modrm(int op_reg, int r, Sym *sym, int c)
335 {
336     gen_modrm_impl(op_reg, r, sym, c, 0);
337 }
338 
339 /* generate a modrm reference. 'op_reg' contains the additional 3
340    opcode bits */
gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)341 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c)
342 {
343     int is_got;
344     is_got = (op_reg & TREG_MEM) && !(sym->type.t & VT_STATIC);
345     orex(1, r, op_reg, opcode);
346     gen_modrm_impl(op_reg, r, sym, c, is_got);
347 }
348 
349 
350 /* load 'r' from value 'sv' */
load(int r, SValue *sv)351 void load(int r, SValue *sv)
352 {
353     int v, t, ft, fc, fr;
354     SValue v1;
355 
356 #ifdef TCC_TARGET_PE
357     SValue v2;
358     sv = pe_getimport(sv, &v2);
359 #endif
360 
361     fr = sv->r;
362     ft = sv->type.t & ~VT_DEFSIGN;
363     fc = sv->c.i;
364     if (fc != sv->c.i && (fr & VT_SYM))
365       tcc_error("64 bit addend in load");
366 
367     ft &= ~(VT_VOLATILE | VT_CONSTANT);
368 
369 #ifndef TCC_TARGET_PE
370     /* we use indirect access via got */
371     if ((fr & VT_VALMASK) == VT_CONST && (fr & VT_SYM) &&
372         (fr & VT_LVAL) && !(sv->sym->type.t & VT_STATIC)) {
373         /* use the result register as a temporal register */
374         int tr = r | TREG_MEM;
375         if (is_float(ft)) {
376             /* we cannot use float registers as a temporal register */
377             tr = get_reg(RC_INT) | TREG_MEM;
378         }
379         gen_modrm64(0x8b, tr, fr, sv->sym, 0);
380 
381         /* load from the temporal register */
382         fr = tr | VT_LVAL;
383     }
384 #endif
385 
386     v = fr & VT_VALMASK;
387     if (fr & VT_LVAL) {
388         int b, ll;
389         if (v == VT_LLOCAL) {
390             v1.type.t = VT_PTR;
391             v1.r = VT_LOCAL | VT_LVAL;
392             v1.c.i = fc;
393             fr = r;
394             if (!(reg_classes[fr] & (RC_INT|RC_R11)))
395                 fr = get_reg(RC_INT);
396             load(fr, &v1);
397         }
398 	if (fc != sv->c.i) {
399 	    /* If the addends doesn't fit into a 32bit signed
400 	       we must use a 64bit move.  We've checked above
401 	       that this doesn't have a sym associated.  */
402 	    v1.type.t = VT_LLONG;
403 	    v1.r = VT_CONST;
404 	    v1.c.i = sv->c.i;
405 	    fr = r;
406 	    if (!(reg_classes[fr] & (RC_INT|RC_R11)))
407 	        fr = get_reg(RC_INT);
408 	    load(fr, &v1);
409 	    fc = 0;
410 	}
411         ll = 0;
412 	/* Like GCC we can load from small enough properly sized
413 	   structs and unions as well.
414 	   XXX maybe move to generic operand handling, but should
415 	   occur only with asm, so tccasm.c might also be a better place */
416 	if ((ft & VT_BTYPE) == VT_STRUCT) {
417 	    int align;
418 	    switch (type_size(&sv->type, &align)) {
419 		case 1: ft = VT_BYTE; break;
420 		case 2: ft = VT_SHORT; break;
421 		case 4: ft = VT_INT; break;
422 		case 8: ft = VT_LLONG; break;
423 		default:
424 		    tcc_error("invalid aggregate type for register load");
425 		    break;
426 	    }
427 	}
428         if ((ft & VT_BTYPE) == VT_FLOAT) {
429             b = 0x6e0f66;
430             r = REG_VALUE(r); /* movd */
431         } else if ((ft & VT_BTYPE) == VT_DOUBLE) {
432             b = 0x7e0ff3; /* movq */
433             r = REG_VALUE(r);
434         } else if ((ft & VT_BTYPE) == VT_LDOUBLE) {
435             b = 0xdb, r = 5; /* fldt */
436         } else if ((ft & VT_TYPE) == VT_BYTE || (ft & VT_TYPE) == VT_BOOL) {
437             b = 0xbe0f;   /* movsbl */
438         } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) {
439             b = 0xb60f;   /* movzbl */
440         } else if ((ft & VT_TYPE) == VT_SHORT) {
441             b = 0xbf0f;   /* movswl */
442         } else if ((ft & VT_TYPE) == (VT_SHORT | VT_UNSIGNED)) {
443             b = 0xb70f;   /* movzwl */
444         } else {
445             assert(((ft & VT_BTYPE) == VT_INT)
446                    || ((ft & VT_BTYPE) == VT_LLONG)
447                    || ((ft & VT_BTYPE) == VT_PTR)
448                    || ((ft & VT_BTYPE) == VT_FUNC)
449                 );
450             ll = is64_type(ft);
451             b = 0x8b;
452         }
453         if (ll) {
454             gen_modrm64(b, r, fr, sv->sym, fc);
455         } else {
456             orex(ll, fr, r, b);
457             gen_modrm(r, fr, sv->sym, fc);
458         }
459     } else {
460         if (v == VT_CONST) {
461             if (fr & VT_SYM) {
462 #ifdef TCC_TARGET_PE
463                 orex(1,0,r,0x8d);
464                 o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
465                 gen_addrpc32(fr, sv->sym, fc);
466 #else
467                 if (sv->sym->type.t & VT_STATIC) {
468                     orex(1,0,r,0x8d);
469                     o(0x05 + REG_VALUE(r) * 8); /* lea xx(%rip), r */
470                     gen_addrpc32(fr, sv->sym, fc);
471                 } else {
472                     orex(1,0,r,0x8b);
473                     o(0x05 + REG_VALUE(r) * 8); /* mov xx(%rip), r */
474                     gen_gotpcrel(r, sv->sym, fc);
475                 }
476 #endif
477             } else if (is64_type(ft)) {
478                 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
479                 gen_le64(sv->c.i);
480             } else {
481                 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
482                 gen_le32(fc);
483             }
484         } else if (v == VT_LOCAL) {
485             orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
486             gen_modrm(r, VT_LOCAL, sv->sym, fc);
487         } else if (v == VT_CMP) {
488 	    if (fc & 0x100)
489 	      {
490                 v = vtop->cmp_r;
491                 fc &= ~0x100;
492 	        /* This was a float compare.  If the parity bit is
493 		   set the result was unordered, meaning false for everything
494 		   except TOK_NE, and true for TOK_NE.  */
495                 orex(0, r, 0, 0xb0 + REG_VALUE(r)); /* mov $0/1,%al */
496                 g(v ^ fc ^ (v == TOK_NE));
497                 o(0x037a + (REX_BASE(r) << 8));
498               }
499             orex(0,r,0, 0x0f); /* setxx %br */
500             o(fc);
501             o(0xc0 + REG_VALUE(r));
502             orex(0,r,0, 0x0f);
503             o(0xc0b6 + REG_VALUE(r) * 0x900); /* movzbl %al, %eax */
504         } else if (v == VT_JMP || v == VT_JMPI) {
505             t = v & 1;
506             orex(0,r,0,0);
507             oad(0xb8 + REG_VALUE(r), t); /* mov $1, r */
508             o(0x05eb + (REX_BASE(r) << 8)); /* jmp after */
509             gsym(fc);
510             orex(0,r,0,0);
511             oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */
512         } else if (v != r) {
513             if ((r >= TREG_XMM0) && (r <= TREG_XMM7)) {
514                 if (v == TREG_ST0) {
515                     /* gen_cvt_ftof(VT_DOUBLE); */
516                     o(0xf0245cdd); /* fstpl -0x10(%rsp) */
517                     /* movsd -0x10(%rsp),%xmmN */
518                     o(0x100ff2);
519                     o(0x44 + REG_VALUE(r)*8); /* %xmmN */
520                     o(0xf024);
521                 } else {
522                     assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
523                     if ((ft & VT_BTYPE) == VT_FLOAT) {
524                         o(0x100ff3);
525                     } else {
526                         assert((ft & VT_BTYPE) == VT_DOUBLE);
527                         o(0x100ff2);
528                     }
529                     o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8);
530                 }
531             } else if (r == TREG_ST0) {
532                 assert((v >= TREG_XMM0) && (v <= TREG_XMM7));
533                 /* gen_cvt_ftof(VT_LDOUBLE); */
534                 /* movsd %xmmN,-0x10(%rsp) */
535                 o(0x110ff2);
536                 o(0x44 + REG_VALUE(r)*8); /* %xmmN */
537                 o(0xf024);
538                 o(0xf02444dd); /* fldl -0x10(%rsp) */
539             } else {
540                 orex(is64_type(ft), r, v, 0x89);
541                 o(0xc0 + REG_VALUE(r) + REG_VALUE(v) * 8); /* mov v, r */
542             }
543         }
544     }
545 }
546 
547 /* store register 'r' in lvalue 'v' */
store(int r, SValue *v)548 void store(int r, SValue *v)
549 {
550     int fr, bt, ft, fc;
551     int op64 = 0;
552     /* store the REX prefix in this variable when PIC is enabled */
553     int pic = 0;
554 
555 #ifdef TCC_TARGET_PE
556     SValue v2;
557     v = pe_getimport(v, &v2);
558 #endif
559 
560     fr = v->r & VT_VALMASK;
561     ft = v->type.t;
562     fc = v->c.i;
563     if (fc != v->c.i && (fr & VT_SYM))
564       tcc_error("64 bit addend in store");
565     ft &= ~(VT_VOLATILE | VT_CONSTANT);
566     bt = ft & VT_BTYPE;
567 
568 #ifndef TCC_TARGET_PE
569     /* we need to access the variable via got */
570     if (fr == VT_CONST && (v->r & VT_SYM)) {
571         /* mov xx(%rip), %r11 */
572         o(0x1d8b4c);
573         gen_gotpcrel(TREG_R11, v->sym, v->c.i);
574         pic = is64_type(bt) ? 0x49 : 0x41;
575     }
576 #endif
577 
578     /* XXX: incorrect if float reg to reg */
579     if (bt == VT_FLOAT) {
580         o(0x66);
581         o(pic);
582         o(0x7e0f); /* movd */
583         r = REG_VALUE(r);
584     } else if (bt == VT_DOUBLE) {
585         o(0x66);
586         o(pic);
587         o(0xd60f); /* movq */
588         r = REG_VALUE(r);
589     } else if (bt == VT_LDOUBLE) {
590         o(0xc0d9); /* fld %st(0) */
591         o(pic);
592         o(0xdb); /* fstpt */
593         r = 7;
594     } else {
595         if (bt == VT_SHORT)
596             o(0x66);
597         o(pic);
598         if (bt == VT_BYTE || bt == VT_BOOL)
599             orex(0, 0, r, 0x88);
600         else if (is64_type(bt))
601             op64 = 0x89;
602         else
603             orex(0, 0, r, 0x89);
604     }
605     if (pic) {
606         /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
607         if (op64)
608             o(op64);
609         o(3 + (r << 3));
610     } else if (op64) {
611         if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
612             gen_modrm64(op64, r, v->r, v->sym, fc);
613         } else if (fr != r) {
614             orex(1, fr, r, op64);
615             o(0xc0 + fr + r * 8); /* mov r, fr */
616         }
617     } else {
618         if (fr == VT_CONST || fr == VT_LOCAL || (v->r & VT_LVAL)) {
619             gen_modrm(r, v->r, v->sym, fc);
620         } else if (fr != r) {
621             o(0xc0 + fr + r * 8); /* mov r, fr */
622         }
623     }
624 }
625 
626 /* 'is_jmp' is '1' if it is a jump */
gcall_or_jmp(int is_jmp)627 static void gcall_or_jmp(int is_jmp)
628 {
629     int r;
630     if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST &&
631 	((vtop->r & VT_SYM) && (vtop->c.i-4) == (int)(vtop->c.i-4))) {
632         /* constant symbolic case -> simple relocation */
633 #ifdef TCC_TARGET_PE
634         greloca(cur_text_section, vtop->sym, ind + 1, R_X86_64_PC32, (int)(vtop->c.i-4));
635 #else
636         greloca(cur_text_section, vtop->sym, ind + 1, R_X86_64_PLT32, (int)(vtop->c.i-4));
637 #endif
638         oad(0xe8 + is_jmp, 0); /* call/jmp im */
639 #ifdef CONFIG_TCC_BCHECK
640         if (tcc_state->do_bounds_check && vtop->sym->v == TOK_alloca)
641             func_bound_alloca_used = 1;
642 #endif
643     } else {
644         /* otherwise, indirect call */
645         r = TREG_R11;
646         load(r, vtop);
647         o(0x41); /* REX */
648         o(0xff); /* call/jmp *r */
649         o(0xd0 + REG_VALUE(r) + (is_jmp << 4));
650     }
651 }
652 
653 #if defined(CONFIG_TCC_BCHECK)
654 
gen_bounds_call(int v)655 static void gen_bounds_call(int v)
656 {
657     Sym *sym = external_global_sym(v, &func_old_type);
658     oad(0xe8, 0);
659 #ifdef TCC_TARGET_PE
660     greloca(cur_text_section, sym, ind-4, R_X86_64_PC32, -4);
661 #else
662     greloca(cur_text_section, sym, ind-4, R_X86_64_PLT32, -4);
663 #endif
664 }
665 
666 /* generate a bounded pointer addition */
gen_bounded_ptr_add(void)667 ST_FUNC void gen_bounded_ptr_add(void)
668 {
669     vpush_global_sym(&func_old_type, TOK___bound_ptr_add);
670     vrott(3);
671     gfunc_call(2);
672     vpushi(0);
673     /* returned pointer is in rax */
674     vtop->r = TREG_RAX | VT_BOUNDED;
675     if (nocode_wanted)
676         return;
677     /* relocation offset of the bounding function call point */
678     vtop->c.i = (cur_text_section->reloc->data_offset - sizeof(ElfW(Rela)));
679 }
680 
681 /* patch pointer addition in vtop so that pointer dereferencing is
682    also tested */
gen_bounded_ptr_deref(void)683 ST_FUNC void gen_bounded_ptr_deref(void)
684 {
685     addr_t func;
686     int size, align;
687     ElfW(Rela) *rel;
688     Sym *sym;
689 
690     if (nocode_wanted)
691         return;
692 
693     size = type_size(&vtop->type, &align);
694     switch(size) {
695     case  1: func = TOK___bound_ptr_indir1; break;
696     case  2: func = TOK___bound_ptr_indir2; break;
697     case  4: func = TOK___bound_ptr_indir4; break;
698     case  8: func = TOK___bound_ptr_indir8; break;
699     case 12: func = TOK___bound_ptr_indir12; break;
700     case 16: func = TOK___bound_ptr_indir16; break;
701     default:
702         /* may happen with struct member access */
703         return;
704         //tcc_error("unhandled size when dereferencing bounded pointer");
705         //func = 0;
706         //break;
707     }
708     sym = external_global_sym(func, &func_old_type);
709     if (!sym->c)
710         put_extern_sym(sym, NULL, 0, 0);
711     /* patch relocation */
712     /* XXX: find a better solution ? */
713     rel = (ElfW(Rela) *)(cur_text_section->reloc->data + vtop->c.i);
714     rel->r_info = ELF64_R_INFO(sym->c, ELF64_R_TYPE(rel->r_info));
715 }
716 
717 #ifdef TCC_TARGET_PE
718 # define TREG_FASTCALL_1 TREG_RCX
719 #else
720 # define TREG_FASTCALL_1 TREG_RDI
721 #endif
722 
gen_bounds_prolog(void)723 static void gen_bounds_prolog(void)
724 {
725     /* leave some room for bound checking code */
726     func_bound_offset = lbounds_section->data_offset;
727     func_bound_ind = ind;
728     func_bound_alloca_used = 0;
729     o(0xb848 + TREG_FASTCALL_1 * 0x100); /*lbound section pointer */
730     gen_le64 (0);
731     oad(0xb8, 0); /* call to function */
732 }
733 
gen_bounds_epilog(void)734 static void gen_bounds_epilog(void)
735 {
736     addr_t saved_ind;
737     addr_t *bounds_ptr;
738     Sym *sym_data;
739 
740     if (func_bound_offset == lbounds_section->data_offset && !func_bound_alloca_used)
741         return;
742 
743     /* add end of table info */
744     bounds_ptr = section_ptr_add(lbounds_section, sizeof(addr_t));
745     *bounds_ptr = 0;
746 
747     /* generate bound local allocation */
748     sym_data = get_sym_ref(&char_pointer_type, lbounds_section,
749                            func_bound_offset, lbounds_section->data_offset);
750     saved_ind = ind;
751     ind = func_bound_ind;
752     greloca(cur_text_section, sym_data, ind + 2, R_X86_64_64, 0);
753     ind = ind + 10;
754     gen_bounds_call(TOK___bound_local_new);
755     ind = saved_ind;
756 
757     /* generate bound check local freeing */
758     o(0x525051); /* save returned value, if any (+ scratch-space for windows) */
759     greloca(cur_text_section, sym_data, ind + 2, R_X86_64_64, 0);
760     o(0xb848 + TREG_FASTCALL_1 * 0x100); /* mov xxx, %rcx/di */
761     gen_le64 (0);
762     gen_bounds_call(TOK___bound_local_delete);
763     o(0x59585a); /* restore returned value, if any */
764 }
765 #endif
766 
767 #ifdef TCC_TARGET_PE
768 
769 #define REGN 4
770 static const uint8_t arg_regs[REGN] = {
771     TREG_RCX, TREG_RDX, TREG_R8, TREG_R9
772 };
773 
774 /* Prepare arguments in R10 and R11 rather than RCX and RDX
775    because gv() will not ever use these */
arg_prepare_reg(int idx)776 static int arg_prepare_reg(int idx) {
777   if (idx == 0 || idx == 1)
778       /* idx=0: r10, idx=1: r11 */
779       return idx + 10;
780   else
781       return arg_regs[idx];
782 }
783 
784 /* Generate function call. The function address is pushed first, then
785    all the parameters in call order. This functions pops all the
786    parameters and the function address. */
787 
gen_offs_sp(int b, int r, int d)788 static void gen_offs_sp(int b, int r, int d)
789 {
790     orex(1,0,r & 0x100 ? 0 : r, b);
791     if (d == (char)d) {
792         o(0x2444 | (REG_VALUE(r) << 3));
793         g(d);
794     } else {
795         o(0x2484 | (REG_VALUE(r) << 3));
796         gen_le32(d);
797     }
798 }
799 
using_regs(int size)800 static int using_regs(int size)
801 {
802     return !(size > 8 || (size & (size - 1)));
803 }
804 
805 /* Return the number of registers needed to return the struct, or 0 if
806    returning via struct pointer. */
gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize)807 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize)
808 {
809     int size, align;
810     *ret_align = 1; // Never have to re-align return values for x86-64
811     *regsize = 8;
812     size = type_size(vt, &align);
813     if (!using_regs(size))
814         return 0;
815     if (size == 8)
816         ret->t = VT_LLONG;
817     else if (size == 4)
818         ret->t = VT_INT;
819     else if (size == 2)
820         ret->t = VT_SHORT;
821     else
822         ret->t = VT_BYTE;
823     ret->ref = NULL;
824     return 1;
825 }
826 
is_sse_float(int t)827 static int is_sse_float(int t) {
828     int bt;
829     bt = t & VT_BTYPE;
830     return bt == VT_DOUBLE || bt == VT_FLOAT;
831 }
832 
gfunc_arg_size(CType *type)833 static int gfunc_arg_size(CType *type) {
834     int align;
835     if (type->t & (VT_ARRAY|VT_BITFIELD))
836         return 8;
837     return type_size(type, &align);
838 }
839 
gfunc_call(int nb_args)840 void gfunc_call(int nb_args)
841 {
842     int size, r, args_size, i, d, bt, struct_size;
843     int arg;
844 
845 #ifdef CONFIG_TCC_BCHECK
846     if (tcc_state->do_bounds_check)
847         gbound_args(nb_args);
848 #endif
849 
850     args_size = (nb_args < REGN ? REGN : nb_args) * PTR_SIZE;
851     arg = nb_args;
852 
853     /* for struct arguments, we need to call memcpy and the function
854        call breaks register passing arguments we are preparing.
855        So, we process arguments which will be passed by stack first. */
856     struct_size = args_size;
857     for(i = 0; i < nb_args; i++) {
858         SValue *sv;
859 
860         --arg;
861         sv = &vtop[-i];
862         bt = (sv->type.t & VT_BTYPE);
863         size = gfunc_arg_size(&sv->type);
864 
865         if (using_regs(size))
866             continue; /* arguments smaller than 8 bytes passed in registers or on stack */
867 
868         if (bt == VT_STRUCT) {
869             /* align to stack align size */
870             size = (size + 15) & ~15;
871             /* generate structure store */
872             r = get_reg(RC_INT);
873             gen_offs_sp(0x8d, r, struct_size);
874             struct_size += size;
875 
876             /* generate memcpy call */
877             vset(&sv->type, r | VT_LVAL, 0);
878             vpushv(sv);
879             vstore();
880             --vtop;
881         } else if (bt == VT_LDOUBLE) {
882             gv(RC_ST0);
883             gen_offs_sp(0xdb, 0x107, struct_size);
884             struct_size += 16;
885         }
886     }
887 
888     if (func_scratch < struct_size)
889         func_scratch = struct_size;
890 
891     arg = nb_args;
892     struct_size = args_size;
893 
894     for(i = 0; i < nb_args; i++) {
895         --arg;
896         bt = (vtop->type.t & VT_BTYPE);
897 
898         size = gfunc_arg_size(&vtop->type);
899         if (!using_regs(size)) {
900             /* align to stack align size */
901             size = (size + 15) & ~15;
902             if (arg >= REGN) {
903                 d = get_reg(RC_INT);
904                 gen_offs_sp(0x8d, d, struct_size);
905                 gen_offs_sp(0x89, d, arg*8);
906             } else {
907                 d = arg_prepare_reg(arg);
908                 gen_offs_sp(0x8d, d, struct_size);
909             }
910             struct_size += size;
911         } else {
912             if (is_sse_float(vtop->type.t)) {
913 		if (tcc_state->nosse)
914 		  tcc_error("SSE disabled");
915                 if (arg >= REGN) {
916                     gv(RC_XMM0);
917                     /* movq %xmm0, j*8(%rsp) */
918                     gen_offs_sp(0xd60f66, 0x100, arg*8);
919                 } else {
920                     /* Load directly to xmmN register */
921                     gv(RC_XMM0 << arg);
922                     d = arg_prepare_reg(arg);
923                     /* mov %xmmN, %rxx */
924                     o(0x66);
925                     orex(1,d,0, 0x7e0f);
926                     o(0xc0 + arg*8 + REG_VALUE(d));
927                 }
928             } else {
929                 if (bt == VT_STRUCT) {
930                     vtop->type.ref = NULL;
931                     vtop->type.t = size > 4 ? VT_LLONG : size > 2 ? VT_INT
932                         : size > 1 ? VT_SHORT : VT_BYTE;
933                 }
934 
935                 r = gv(RC_INT);
936                 if (arg >= REGN) {
937                     gen_offs_sp(0x89, r, arg*8);
938                 } else {
939                     d = arg_prepare_reg(arg);
940                     orex(1,d,r,0x89); /* mov */
941                     o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
942                 }
943             }
944         }
945         vtop--;
946     }
947     save_regs(0);
948     /* Copy R10 and R11 into RCX and RDX, respectively */
949     if (nb_args > 0) {
950         o(0xd1894c); /* mov %r10, %rcx */
951         if (nb_args > 1) {
952             o(0xda894c); /* mov %r11, %rdx */
953         }
954     }
955 
956     gcall_or_jmp(0);
957 
958     if ((vtop->r & VT_SYM) && vtop->sym->v == TOK_alloca) {
959         /* need to add the "func_scratch" area after alloca */
960         o(0x48); func_alloca = oad(0x05, func_alloca); /* add $NN, %rax */
961 #ifdef CONFIG_TCC_BCHECK
962         if (tcc_state->do_bounds_check)
963             gen_bounds_call(TOK___bound_alloca_nr); /* new region */
964 #endif
965     }
966     vtop--;
967 }
968 
969 
970 #define FUNC_PROLOG_SIZE 11
971 
972 /* generate function prolog of type 't' */
gfunc_prolog(Sym *func_sym)973 void gfunc_prolog(Sym *func_sym)
974 {
975     CType *func_type = &func_sym->type;
976     int addr, reg_param_index, bt, size;
977     Sym *sym;
978     CType *type;
979 
980     func_ret_sub = 0;
981     func_scratch = 32;
982     func_alloca = 0;
983     loc = 0;
984 
985     addr = PTR_SIZE * 2;
986     ind += FUNC_PROLOG_SIZE;
987     func_sub_sp_offset = ind;
988     reg_param_index = 0;
989 
990     sym = func_type->ref;
991 
992     /* if the function returns a structure, then add an
993        implicit pointer parameter */
994     size = gfunc_arg_size(&func_vt);
995     if (!using_regs(size)) {
996         gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
997         func_vc = addr;
998         reg_param_index++;
999         addr += 8;
1000     }
1001 
1002     /* define parameters */
1003     while ((sym = sym->next) != NULL) {
1004         type = &sym->type;
1005         bt = type->t & VT_BTYPE;
1006         size = gfunc_arg_size(type);
1007         if (!using_regs(size)) {
1008             if (reg_param_index < REGN) {
1009                 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
1010             }
1011             sym_push(sym->v & ~SYM_FIELD, type,
1012                      VT_LLOCAL | VT_LVAL, addr);
1013         } else {
1014             if (reg_param_index < REGN) {
1015                 /* save arguments passed by register */
1016                 if ((bt == VT_FLOAT) || (bt == VT_DOUBLE)) {
1017 		    if (tcc_state->nosse)
1018 		      tcc_error("SSE disabled");
1019                     o(0xd60f66); /* movq */
1020                     gen_modrm(reg_param_index, VT_LOCAL, NULL, addr);
1021                 } else {
1022                     gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
1023                 }
1024             }
1025             sym_push(sym->v & ~SYM_FIELD, type,
1026 		     VT_LOCAL | VT_LVAL, addr);
1027         }
1028         addr += 8;
1029         reg_param_index++;
1030     }
1031 
1032     while (reg_param_index < REGN) {
1033         if (func_var) {
1034             gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);
1035             addr += 8;
1036         }
1037         reg_param_index++;
1038     }
1039 #ifdef CONFIG_TCC_BCHECK
1040     if (tcc_state->do_bounds_check)
1041         gen_bounds_prolog();
1042 #endif
1043 }
1044 
1045 /* generate function epilog */
gfunc_epilog(void)1046 void gfunc_epilog(void)
1047 {
1048     int v, saved_ind;
1049 
1050     /* align local size to word & save local variables */
1051     func_scratch = (func_scratch + 15) & -16;
1052     loc = (loc & -16) - func_scratch;
1053 
1054 #ifdef CONFIG_TCC_BCHECK
1055     if (tcc_state->do_bounds_check)
1056         gen_bounds_epilog();
1057 #endif
1058 
1059     o(0xc9); /* leave */
1060     if (func_ret_sub == 0) {
1061         o(0xc3); /* ret */
1062     } else {
1063         o(0xc2); /* ret n */
1064         g(func_ret_sub);
1065         g(func_ret_sub >> 8);
1066     }
1067 
1068     saved_ind = ind;
1069     ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1070     v = -loc;
1071 
1072     if (v >= 4096) {
1073         Sym *sym = external_global_sym(TOK___chkstk, &func_old_type);
1074         oad(0xb8, v); /* mov stacksize, %eax */
1075         oad(0xe8, 0); /* call __chkstk, (does the stackframe too) */
1076         greloca(cur_text_section, sym, ind-4, R_X86_64_PC32, -4);
1077         o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
1078     } else {
1079         o(0xe5894855);  /* push %rbp, mov %rsp, %rbp */
1080         o(0xec8148);  /* sub rsp, stacksize */
1081         gen_le32(v);
1082     }
1083 
1084     /* add the "func_scratch" area after each alloca seen */
1085     gsym_addr(func_alloca, -func_scratch);
1086 
1087     cur_text_section->data_offset = saved_ind;
1088     pe_add_unwind_data(ind, saved_ind, v);
1089     ind = cur_text_section->data_offset;
1090 }
1091 
1092 #else
1093 
gadd_sp(int val)1094 static void gadd_sp(int val)
1095 {
1096     if (val == (char)val) {
1097         o(0xc48348);
1098         g(val);
1099     } else {
1100         oad(0xc48148, val); /* add $xxx, %rsp */
1101     }
1102 }
1103 
1104 typedef enum X86_64_Mode {
1105   x86_64_mode_none,
1106   x86_64_mode_memory,
1107   x86_64_mode_integer,
1108   x86_64_mode_sse,
1109   x86_64_mode_x87
1110 } X86_64_Mode;
1111 
classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b)1112 static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b)
1113 {
1114     if (a == b)
1115         return a;
1116     else if (a == x86_64_mode_none)
1117         return b;
1118     else if (b == x86_64_mode_none)
1119         return a;
1120     else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory))
1121         return x86_64_mode_memory;
1122     else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer))
1123         return x86_64_mode_integer;
1124     else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87))
1125         return x86_64_mode_memory;
1126     else
1127         return x86_64_mode_sse;
1128 }
1129 
classify_x86_64_inner(CType *ty)1130 static X86_64_Mode classify_x86_64_inner(CType *ty)
1131 {
1132     X86_64_Mode mode;
1133     Sym *f;
1134 
1135     switch (ty->t & VT_BTYPE) {
1136     case VT_VOID: return x86_64_mode_none;
1137 
1138     case VT_INT:
1139     case VT_BYTE:
1140     case VT_SHORT:
1141     case VT_LLONG:
1142     case VT_BOOL:
1143     case VT_PTR:
1144     case VT_FUNC:
1145         return x86_64_mode_integer;
1146 
1147     case VT_FLOAT:
1148     case VT_DOUBLE: return x86_64_mode_sse;
1149 
1150     case VT_LDOUBLE: return x86_64_mode_x87;
1151 
1152     case VT_STRUCT:
1153         f = ty->ref;
1154 
1155         mode = x86_64_mode_none;
1156         for (f = f->next; f; f = f->next)
1157             mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type));
1158 
1159         return mode;
1160     }
1161     assert(0);
1162     return 0;
1163 }
1164 
classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *palign, int *reg_count)1165 static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *palign, int *reg_count)
1166 {
1167     X86_64_Mode mode;
1168     int size, align, ret_t = 0;
1169 
1170     if (ty->t & (VT_BITFIELD|VT_ARRAY)) {
1171         *psize = 8;
1172         *palign = 8;
1173         *reg_count = 1;
1174         ret_t = ty->t;
1175         mode = x86_64_mode_integer;
1176     } else {
1177         size = type_size(ty, &align);
1178         *psize = (size + 7) & ~7;
1179         *palign = (align + 7) & ~7;
1180 
1181         if (size > 16) {
1182             mode = x86_64_mode_memory;
1183         } else {
1184             mode = classify_x86_64_inner(ty);
1185             switch (mode) {
1186             case x86_64_mode_integer:
1187                 if (size > 8) {
1188                     *reg_count = 2;
1189                     ret_t = VT_QLONG;
1190                 } else {
1191                     *reg_count = 1;
1192                     if (size > 4)
1193                         ret_t = VT_LLONG;
1194                     else if (size > 2)
1195                         ret_t = VT_INT;
1196                     else if (size > 1)
1197                         ret_t = VT_SHORT;
1198                     else
1199                         ret_t = VT_BYTE;
1200                     if ((ty->t & VT_BTYPE) == VT_STRUCT || (ty->t & VT_UNSIGNED))
1201                         ret_t |= VT_UNSIGNED;
1202                 }
1203                 break;
1204 
1205             case x86_64_mode_x87:
1206                 *reg_count = 1;
1207                 ret_t = VT_LDOUBLE;
1208                 break;
1209 
1210             case x86_64_mode_sse:
1211                 if (size > 8) {
1212                     *reg_count = 2;
1213                     ret_t = VT_QFLOAT;
1214                 } else {
1215                     *reg_count = 1;
1216                     ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT;
1217                 }
1218                 break;
1219             default: break; /* nothing to be done for x86_64_mode_memory and x86_64_mode_none*/
1220             }
1221         }
1222     }
1223 
1224     if (ret) {
1225         ret->ref = NULL;
1226         ret->t = ret_t;
1227     }
1228 
1229     return mode;
1230 }
1231 
classify_x86_64_va_arg(CType *ty)1232 ST_FUNC int classify_x86_64_va_arg(CType *ty)
1233 {
1234     /* This definition must be synced with stdarg.h */
1235     enum __va_arg_type {
1236         __va_gen_reg, __va_float_reg, __va_stack
1237     };
1238     int size, align, reg_count;
1239     X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &align, &reg_count);
1240     switch (mode) {
1241     default: return __va_stack;
1242     case x86_64_mode_integer: return __va_gen_reg;
1243     case x86_64_mode_sse: return __va_float_reg;
1244     }
1245 }
1246 
1247 /* Return the number of registers needed to return the struct, or 0 if
1248    returning via struct pointer. */
gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize)1249 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize)
1250 {
1251     int size, align, reg_count;
1252     *ret_align = 1; // Never have to re-align return values for x86-64
1253     *regsize = 8;
1254     return (classify_x86_64_arg(vt, ret, &size, &align, &reg_count) != x86_64_mode_memory);
1255 }
1256 
1257 #define REGN 6
1258 static const uint8_t arg_regs[REGN] = {
1259     TREG_RDI, TREG_RSI, TREG_RDX, TREG_RCX, TREG_R8, TREG_R9
1260 };
1261 
arg_prepare_reg(int idx)1262 static int arg_prepare_reg(int idx) {
1263   if (idx == 2 || idx == 3)
1264       /* idx=2: r10, idx=3: r11 */
1265       return idx + 8;
1266   else
1267       return arg_regs[idx];
1268 }
1269 
1270 /* Generate function call. The function address is pushed first, then
1271    all the parameters in call order. This functions pops all the
1272    parameters and the function address. */
gfunc_call(int nb_args)1273 void gfunc_call(int nb_args)
1274 {
1275     X86_64_Mode mode;
1276     CType type;
1277     int size, align, r, args_size, stack_adjust, i, reg_count;
1278     int nb_reg_args = 0;
1279     int nb_sse_args = 0;
1280     int sse_reg, gen_reg;
1281     char _onstack[nb_args ? nb_args : 1], *onstack = _onstack;
1282 
1283 #ifdef CONFIG_TCC_BCHECK
1284     if (tcc_state->do_bounds_check)
1285         gbound_args(nb_args);
1286 #endif
1287 
1288     /* calculate the number of integer/float register arguments, remember
1289        arguments to be passed via stack (in onstack[]), and also remember
1290        if we have to align the stack pointer to 16 (onstack[i] == 2).  Needs
1291        to be done in a left-to-right pass over arguments.  */
1292     stack_adjust = 0;
1293     for(i = nb_args - 1; i >= 0; i--) {
1294         mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1295         if (mode == x86_64_mode_sse && nb_sse_args + reg_count <= 8) {
1296             nb_sse_args += reg_count;
1297 	    onstack[i] = 0;
1298 	} else if (mode == x86_64_mode_integer && nb_reg_args + reg_count <= REGN) {
1299             nb_reg_args += reg_count;
1300 	    onstack[i] = 0;
1301 	} else if (mode == x86_64_mode_none) {
1302 	    onstack[i] = 0;
1303 	} else {
1304 	    if (align == 16 && (stack_adjust &= 15)) {
1305 		onstack[i] = 2;
1306 		stack_adjust = 0;
1307 	    } else
1308 	      onstack[i] = 1;
1309 	    stack_adjust += size;
1310 	}
1311     }
1312 
1313     if (nb_sse_args && tcc_state->nosse)
1314       tcc_error("SSE disabled but floating point arguments passed");
1315 
1316     /* fetch cpu flag before generating any code */
1317     if ((vtop->r & VT_VALMASK) == VT_CMP)
1318       gv(RC_INT);
1319 
1320     /* for struct arguments, we need to call memcpy and the function
1321        call breaks register passing arguments we are preparing.
1322        So, we process arguments which will be passed by stack first. */
1323     gen_reg = nb_reg_args;
1324     sse_reg = nb_sse_args;
1325     args_size = 0;
1326     stack_adjust &= 15;
1327     for (i = 0; i < nb_args;) {
1328 	mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, &reg_count);
1329 	if (!onstack[i]) {
1330 	    ++i;
1331 	    continue;
1332 	}
1333         /* Possibly adjust stack to align SSE boundary.  We're processing
1334 	   args from right to left while allocating happens left to right
1335 	   (stack grows down), so the adjustment needs to happen _after_
1336 	   an argument that requires it.  */
1337         if (stack_adjust) {
1338 	    o(0x50); /* push %rax; aka sub $8,%rsp */
1339             args_size += 8;
1340 	    stack_adjust = 0;
1341         }
1342 	if (onstack[i] == 2)
1343 	  stack_adjust = 1;
1344 
1345 	vrotb(i+1);
1346 
1347 	switch (vtop->type.t & VT_BTYPE) {
1348 	    case VT_STRUCT:
1349 		/* allocate the necessary size on stack */
1350 		o(0x48);
1351 		oad(0xec81, size); /* sub $xxx, %rsp */
1352 		/* generate structure store */
1353 		r = get_reg(RC_INT);
1354 		orex(1, r, 0, 0x89); /* mov %rsp, r */
1355 		o(0xe0 + REG_VALUE(r));
1356 		vset(&vtop->type, r | VT_LVAL, 0);
1357 		vswap();
1358 		vstore();
1359 		break;
1360 
1361 	    case VT_LDOUBLE:
1362                 gv(RC_ST0);
1363                 oad(0xec8148, size); /* sub $xxx, %rsp */
1364                 o(0x7cdb); /* fstpt 0(%rsp) */
1365                 g(0x24);
1366                 g(0x00);
1367 		break;
1368 
1369 	    case VT_FLOAT:
1370 	    case VT_DOUBLE:
1371 		assert(mode == x86_64_mode_sse);
1372 		r = gv(RC_FLOAT);
1373 		o(0x50); /* push $rax */
1374 		/* movq %xmmN, (%rsp) */
1375 		o(0xd60f66);
1376 		o(0x04 + REG_VALUE(r)*8);
1377 		o(0x24);
1378 		break;
1379 
1380 	    default:
1381 		assert(mode == x86_64_mode_integer);
1382 		/* simple type */
1383 		/* XXX: implicit cast ? */
1384 		r = gv(RC_INT);
1385 		orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */
1386 		break;
1387 	}
1388 	args_size += size;
1389 
1390 	vpop();
1391 	--nb_args;
1392 	onstack++;
1393     }
1394 
1395     /* XXX This should be superfluous.  */
1396     save_regs(0); /* save used temporary registers */
1397 
1398     /* then, we prepare register passing arguments.
1399        Note that we cannot set RDX and RCX in this loop because gv()
1400        may break these temporary registers. Let's use R10 and R11
1401        instead of them */
1402     assert(gen_reg <= REGN);
1403     assert(sse_reg <= 8);
1404     for(i = 0; i < nb_args; i++) {
1405         mode = classify_x86_64_arg(&vtop->type, &type, &size, &align, &reg_count);
1406         /* Alter stack entry type so that gv() knows how to treat it */
1407         vtop->type = type;
1408         if (mode == x86_64_mode_sse) {
1409             if (reg_count == 2) {
1410                 sse_reg -= 2;
1411                 gv(RC_FRET); /* Use pair load into xmm0 & xmm1 */
1412                 if (sse_reg) { /* avoid redundant movaps %xmm0, %xmm0 */
1413                     /* movaps %xmm1, %xmmN */
1414                     o(0x280f);
1415                     o(0xc1 + ((sse_reg+1) << 3));
1416                     /* movaps %xmm0, %xmmN */
1417                     o(0x280f);
1418                     o(0xc0 + (sse_reg << 3));
1419                 }
1420             } else {
1421                 assert(reg_count == 1);
1422                 --sse_reg;
1423                 /* Load directly to register */
1424                 gv(RC_XMM0 << sse_reg);
1425             }
1426         } else if (mode == x86_64_mode_integer) {
1427             /* simple type */
1428             /* XXX: implicit cast ? */
1429             int d;
1430             gen_reg -= reg_count;
1431             r = gv(RC_INT);
1432             d = arg_prepare_reg(gen_reg);
1433             orex(1,d,r,0x89); /* mov */
1434             o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d));
1435             if (reg_count == 2) {
1436                 d = arg_prepare_reg(gen_reg+1);
1437                 orex(1,d,vtop->r2,0x89); /* mov */
1438                 o(0xc0 + REG_VALUE(vtop->r2) * 8 + REG_VALUE(d));
1439             }
1440         }
1441         vtop--;
1442     }
1443     assert(gen_reg == 0);
1444     assert(sse_reg == 0);
1445 
1446     /* We shouldn't have many operands on the stack anymore, but the
1447        call address itself is still there, and it might be in %eax
1448        (or edx/ecx) currently, which the below writes would clobber.
1449        So evict all remaining operands here.  */
1450     save_regs(0);
1451 
1452     /* Copy R10 and R11 into RDX and RCX, respectively */
1453     if (nb_reg_args > 2) {
1454         o(0xd2894c); /* mov %r10, %rdx */
1455         if (nb_reg_args > 3) {
1456             o(0xd9894c); /* mov %r11, %rcx */
1457         }
1458     }
1459 
1460     if (vtop->type.ref->f.func_type != FUNC_NEW) /* implies FUNC_OLD or FUNC_ELLIPSIS */
1461         oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */
1462     gcall_or_jmp(0);
1463     if (args_size)
1464         gadd_sp(args_size);
1465     vtop--;
1466 }
1467 
1468 #define FUNC_PROLOG_SIZE 11
1469 
push_arg_reg(int i)1470 static void push_arg_reg(int i) {
1471     loc -= 8;
1472     gen_modrm64(0x89, arg_regs[i], VT_LOCAL, NULL, loc);
1473 }
1474 
1475 /* generate function prolog of type 't' */
gfunc_prolog(Sym *func_sym)1476 void gfunc_prolog(Sym *func_sym)
1477 {
1478     CType *func_type = &func_sym->type;
1479     X86_64_Mode mode;
1480     int i, addr, align, size, reg_count;
1481     int param_addr = 0, reg_param_index, sse_param_index;
1482     Sym *sym;
1483     CType *type;
1484 
1485     sym = func_type->ref;
1486     addr = PTR_SIZE * 2;
1487     loc = 0;
1488     ind += FUNC_PROLOG_SIZE;
1489     func_sub_sp_offset = ind;
1490     func_ret_sub = 0;
1491 
1492     if (func_var) {
1493         int seen_reg_num, seen_sse_num, seen_stack_size;
1494         seen_reg_num = seen_sse_num = 0;
1495         /* frame pointer and return address */
1496         seen_stack_size = PTR_SIZE * 2;
1497         /* count the number of seen parameters */
1498         sym = func_type->ref;
1499         while ((sym = sym->next) != NULL) {
1500             type = &sym->type;
1501             mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
1502             switch (mode) {
1503             default:
1504             stack_arg:
1505                 seen_stack_size = ((seen_stack_size + align - 1) & -align) + size;
1506                 break;
1507 
1508             case x86_64_mode_integer:
1509                 if (seen_reg_num + reg_count > REGN)
1510 		    goto stack_arg;
1511 		seen_reg_num += reg_count;
1512                 break;
1513 
1514             case x86_64_mode_sse:
1515                 if (seen_sse_num + reg_count > 8)
1516 		    goto stack_arg;
1517 		seen_sse_num += reg_count;
1518                 break;
1519             }
1520         }
1521 
1522         loc -= 24;
1523         /* movl $0x????????, -0x18(%rbp) */
1524         o(0xe845c7);
1525         gen_le32(seen_reg_num * 8);
1526         /* movl $0x????????, -0x14(%rbp) */
1527         o(0xec45c7);
1528         gen_le32(seen_sse_num * 16 + 48);
1529 	/* leaq $0x????????, %r11 */
1530 	o(0x9d8d4c);
1531 	gen_le32(seen_stack_size);
1532 	/* movq %r11, -0x10(%rbp) */
1533 	o(0xf05d894c);
1534 	/* leaq $-192(%rbp), %r11 */
1535 	o(0x9d8d4c);
1536 	gen_le32(-176 - 24);
1537 	/* movq %r11, -0x8(%rbp) */
1538 	o(0xf85d894c);
1539 
1540         /* save all register passing arguments */
1541         for (i = 0; i < 8; i++) {
1542             loc -= 16;
1543 	    if (!tcc_state->nosse) {
1544 		o(0xd60f66); /* movq */
1545 		gen_modrm(7 - i, VT_LOCAL, NULL, loc);
1546 	    }
1547             /* movq $0, loc+8(%rbp) */
1548             o(0x85c748);
1549             gen_le32(loc + 8);
1550             gen_le32(0);
1551         }
1552         for (i = 0; i < REGN; i++) {
1553             push_arg_reg(REGN-1-i);
1554         }
1555     }
1556 
1557     sym = func_type->ref;
1558     reg_param_index = 0;
1559     sse_param_index = 0;
1560 
1561     /* if the function returns a structure, then add an
1562        implicit pointer parameter */
1563     mode = classify_x86_64_arg(&func_vt, NULL, &size, &align, &reg_count);
1564     if (mode == x86_64_mode_memory) {
1565         push_arg_reg(reg_param_index);
1566         func_vc = loc;
1567         reg_param_index++;
1568     }
1569     /* define parameters */
1570     while ((sym = sym->next) != NULL) {
1571         type = &sym->type;
1572         mode = classify_x86_64_arg(type, NULL, &size, &align, &reg_count);
1573         switch (mode) {
1574         case x86_64_mode_sse:
1575 	    if (tcc_state->nosse)
1576 	        tcc_error("SSE disabled but floating point arguments used");
1577             if (sse_param_index + reg_count <= 8) {
1578                 /* save arguments passed by register */
1579                 loc -= reg_count * 8;
1580                 param_addr = loc;
1581                 for (i = 0; i < reg_count; ++i) {
1582                     o(0xd60f66); /* movq */
1583                     gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + i*8);
1584                     ++sse_param_index;
1585                 }
1586             } else {
1587                 addr = (addr + align - 1) & -align;
1588                 param_addr = addr;
1589                 addr += size;
1590             }
1591             break;
1592 
1593         case x86_64_mode_memory:
1594         case x86_64_mode_x87:
1595             addr = (addr + align - 1) & -align;
1596             param_addr = addr;
1597             addr += size;
1598             break;
1599 
1600         case x86_64_mode_integer: {
1601             if (reg_param_index + reg_count <= REGN) {
1602                 /* save arguments passed by register */
1603                 loc -= reg_count * 8;
1604                 param_addr = loc;
1605                 for (i = 0; i < reg_count; ++i) {
1606                     gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + i*8);
1607                     ++reg_param_index;
1608                 }
1609             } else {
1610                 addr = (addr + align - 1) & -align;
1611                 param_addr = addr;
1612                 addr += size;
1613             }
1614             break;
1615         }
1616 	default: break; /* nothing to be done for x86_64_mode_none */
1617         }
1618         sym_push(sym->v & ~SYM_FIELD, type,
1619                  VT_LOCAL | VT_LVAL, param_addr);
1620     }
1621 
1622 #ifdef CONFIG_TCC_BCHECK
1623     if (tcc_state->do_bounds_check)
1624         gen_bounds_prolog();
1625 #endif
1626 }
1627 
1628 /* generate function epilog */
gfunc_epilog(void)1629 void gfunc_epilog(void)
1630 {
1631     int v, saved_ind;
1632 
1633 #ifdef CONFIG_TCC_BCHECK
1634     if (tcc_state->do_bounds_check)
1635         gen_bounds_epilog();
1636 #endif
1637     o(0xc9); /* leave */
1638     if (func_ret_sub == 0) {
1639         o(0xc3); /* ret */
1640     } else {
1641         o(0xc2); /* ret n */
1642         g(func_ret_sub);
1643         g(func_ret_sub >> 8);
1644     }
1645     /* align local size to word & save local variables */
1646     v = (-loc + 15) & -16;
1647     saved_ind = ind;
1648     ind = func_sub_sp_offset - FUNC_PROLOG_SIZE;
1649     o(0xe5894855);  /* push %rbp, mov %rsp, %rbp */
1650     o(0xec8148);  /* sub rsp, stacksize */
1651     gen_le32(v);
1652     ind = saved_ind;
1653 }
1654 
1655 #endif /* not PE */
1656 
gen_fill_nops(int bytes)1657 ST_FUNC void gen_fill_nops(int bytes)
1658 {
1659     while (bytes--)
1660       g(0x90);
1661 }
1662 
1663 /* generate a jump to a label */
gjmp(int t)1664 int gjmp(int t)
1665 {
1666     return gjmp2(0xe9, t);
1667 }
1668 
1669 /* generate a jump to a fixed address */
gjmp_addr(int a)1670 void gjmp_addr(int a)
1671 {
1672     int r;
1673     r = a - ind - 2;
1674     if (r == (char)r) {
1675         g(0xeb);
1676         g(r);
1677     } else {
1678         oad(0xe9, a - ind - 5);
1679     }
1680 }
1681 
gjmp_append(int n, int t)1682 ST_FUNC int gjmp_append(int n, int t)
1683 {
1684     void *p;
1685     /* insert vtop->c jump list in t */
1686     if (n) {
1687         uint32_t n1 = n, n2;
1688         while ((n2 = read32le(p = cur_text_section->data + n1)))
1689             n1 = n2;
1690         write32le(p, t);
1691         t = n;
1692     }
1693     return t;
1694 }
1695 
gjmp_cond(int op, int t)1696 ST_FUNC int gjmp_cond(int op, int t)
1697 {
1698         if (op & 0x100)
1699 	  {
1700 	    /* This was a float compare.  If the parity flag is set
1701 	       the result was unordered.  For anything except != this
1702 	       means false and we don't jump (anding both conditions).
1703 	       For != this means true (oring both).
1704 	       Take care about inverting the test.  We need to jump
1705 	       to our target if the result was unordered and test wasn't NE,
1706 	       otherwise if unordered we don't want to jump.  */
1707             int v = vtop->cmp_r;
1708             op &= ~0x100;
1709             if (op ^ v ^ (v != TOK_NE))
1710               o(0x067a);  /* jp +6 */
1711 	    else
1712 	      {
1713 	        g(0x0f);
1714 		t = gjmp2(0x8a, t); /* jp t */
1715 	      }
1716 	  }
1717         g(0x0f);
1718         t = gjmp2(op - 16, t);
1719         return t;
1720 }
1721 
1722 /* generate an integer binary operation */
gen_opi(int op)1723 void gen_opi(int op)
1724 {
1725     int r, fr, opc, c;
1726     int ll, uu, cc;
1727 
1728     ll = is64_type(vtop[-1].type.t);
1729     uu = (vtop[-1].type.t & VT_UNSIGNED) != 0;
1730     cc = (vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST;
1731 
1732     switch(op) {
1733     case '+':
1734     case TOK_ADDC1: /* add with carry generation */
1735         opc = 0;
1736     gen_op8:
1737         if (cc && (!ll || (int)vtop->c.i == vtop->c.i)) {
1738             /* constant case */
1739             vswap();
1740             r = gv(RC_INT);
1741             vswap();
1742             c = vtop->c.i;
1743             if (c == (char)c) {
1744                 /* XXX: generate inc and dec for smaller code ? */
1745                 orex(ll, r, 0, 0x83);
1746                 o(0xc0 | (opc << 3) | REG_VALUE(r));
1747                 g(c);
1748             } else {
1749                 orex(ll, r, 0, 0x81);
1750                 oad(0xc0 | (opc << 3) | REG_VALUE(r), c);
1751             }
1752         } else {
1753             gv2(RC_INT, RC_INT);
1754             r = vtop[-1].r;
1755             fr = vtop[0].r;
1756             orex(ll, r, fr, (opc << 3) | 0x01);
1757             o(0xc0 + REG_VALUE(r) + REG_VALUE(fr) * 8);
1758         }
1759         vtop--;
1760         if (op >= TOK_ULT && op <= TOK_GT)
1761             vset_VT_CMP(op);
1762         break;
1763     case '-':
1764     case TOK_SUBC1: /* sub with carry generation */
1765         opc = 5;
1766         goto gen_op8;
1767     case TOK_ADDC2: /* add with carry use */
1768         opc = 2;
1769         goto gen_op8;
1770     case TOK_SUBC2: /* sub with carry use */
1771         opc = 3;
1772         goto gen_op8;
1773     case '&':
1774         opc = 4;
1775         goto gen_op8;
1776     case '^':
1777         opc = 6;
1778         goto gen_op8;
1779     case '|':
1780         opc = 1;
1781         goto gen_op8;
1782     case '*':
1783         gv2(RC_INT, RC_INT);
1784         r = vtop[-1].r;
1785         fr = vtop[0].r;
1786         orex(ll, fr, r, 0xaf0f); /* imul fr, r */
1787         o(0xc0 + REG_VALUE(fr) + REG_VALUE(r) * 8);
1788         vtop--;
1789         break;
1790     case TOK_SHL:
1791         opc = 4;
1792         goto gen_shift;
1793     case TOK_SHR:
1794         opc = 5;
1795         goto gen_shift;
1796     case TOK_SAR:
1797         opc = 7;
1798     gen_shift:
1799         opc = 0xc0 | (opc << 3);
1800         if (cc) {
1801             /* constant case */
1802             vswap();
1803             r = gv(RC_INT);
1804             vswap();
1805             orex(ll, r, 0, 0xc1); /* shl/shr/sar $xxx, r */
1806             o(opc | REG_VALUE(r));
1807             g(vtop->c.i & (ll ? 63 : 31));
1808         } else {
1809             /* we generate the shift in ecx */
1810             gv2(RC_INT, RC_RCX);
1811             r = vtop[-1].r;
1812             orex(ll, r, 0, 0xd3); /* shl/shr/sar %cl, r */
1813             o(opc | REG_VALUE(r));
1814         }
1815         vtop--;
1816         break;
1817     case TOK_UDIV:
1818     case TOK_UMOD:
1819         uu = 1;
1820         goto divmod;
1821     case '/':
1822     case '%':
1823     case TOK_PDIV:
1824         uu = 0;
1825     divmod:
1826         /* first operand must be in eax */
1827         /* XXX: need better constraint for second operand */
1828         gv2(RC_RAX, RC_RCX);
1829         r = vtop[-1].r;
1830         fr = vtop[0].r;
1831         vtop--;
1832         save_reg(TREG_RDX);
1833         orex(ll, 0, 0, uu ? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1834         orex(ll, fr, 0, 0xf7); /* div fr, %eax */
1835         o((uu ? 0xf0 : 0xf8) + REG_VALUE(fr));
1836         if (op == '%' || op == TOK_UMOD)
1837             r = TREG_RDX;
1838         else
1839             r = TREG_RAX;
1840         vtop->r = r;
1841         break;
1842     default:
1843         opc = 7;
1844         goto gen_op8;
1845     }
1846 }
1847 
gen_opl(int op)1848 void gen_opl(int op)
1849 {
1850     gen_opi(op);
1851 }
1852 
1853 /* generate a floating point operation 'v = t1 op t2' instruction. The
1854    two operands are guaranteed to have the same floating point type */
1855 /* XXX: need to use ST1 too */
gen_opf(int op)1856 void gen_opf(int op)
1857 {
1858     int a, ft, fc, swapped, r;
1859     int float_type =
1860         (vtop->type.t & VT_BTYPE) == VT_LDOUBLE ? RC_ST0 : RC_FLOAT;
1861 
1862     /* convert constants to memory references */
1863     if ((vtop[-1].r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
1864         vswap();
1865         gv(float_type);
1866         vswap();
1867     }
1868     if ((vtop[0].r & (VT_VALMASK | VT_LVAL)) == VT_CONST)
1869         gv(float_type);
1870 
1871     /* must put at least one value in the floating point register */
1872     if ((vtop[-1].r & VT_LVAL) &&
1873         (vtop[0].r & VT_LVAL)) {
1874         vswap();
1875         gv(float_type);
1876         vswap();
1877     }
1878     swapped = 0;
1879     /* swap the stack if needed so that t1 is the register and t2 is
1880        the memory reference */
1881     if (vtop[-1].r & VT_LVAL) {
1882         vswap();
1883         swapped = 1;
1884     }
1885     if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) {
1886         if (op >= TOK_ULT && op <= TOK_GT) {
1887             /* load on stack second operand */
1888             load(TREG_ST0, vtop);
1889             save_reg(TREG_RAX); /* eax is used by FP comparison code */
1890             if (op == TOK_GE || op == TOK_GT)
1891                 swapped = !swapped;
1892             else if (op == TOK_EQ || op == TOK_NE)
1893                 swapped = 0;
1894             if (swapped)
1895                 o(0xc9d9); /* fxch %st(1) */
1896             if (op == TOK_EQ || op == TOK_NE)
1897                 o(0xe9da); /* fucompp */
1898             else
1899                 o(0xd9de); /* fcompp */
1900             o(0xe0df); /* fnstsw %ax */
1901             if (op == TOK_EQ) {
1902                 o(0x45e480); /* and $0x45, %ah */
1903                 o(0x40fC80); /* cmp $0x40, %ah */
1904             } else if (op == TOK_NE) {
1905                 o(0x45e480); /* and $0x45, %ah */
1906                 o(0x40f480); /* xor $0x40, %ah */
1907                 op = TOK_NE;
1908             } else if (op == TOK_GE || op == TOK_LE) {
1909                 o(0x05c4f6); /* test $0x05, %ah */
1910                 op = TOK_EQ;
1911             } else {
1912                 o(0x45c4f6); /* test $0x45, %ah */
1913                 op = TOK_EQ;
1914             }
1915             vtop--;
1916             vset_VT_CMP(op);
1917         } else {
1918             /* no memory reference possible for long double operations */
1919             load(TREG_ST0, vtop);
1920             swapped = !swapped;
1921 
1922             switch(op) {
1923             default:
1924             case '+':
1925                 a = 0;
1926                 break;
1927             case '-':
1928                 a = 4;
1929                 if (swapped)
1930                     a++;
1931                 break;
1932             case '*':
1933                 a = 1;
1934                 break;
1935             case '/':
1936                 a = 6;
1937                 if (swapped)
1938                     a++;
1939                 break;
1940             }
1941             ft = vtop->type.t;
1942             fc = vtop->c.i;
1943             o(0xde); /* fxxxp %st, %st(1) */
1944             o(0xc1 + (a << 3));
1945             vtop--;
1946         }
1947     } else {
1948         if (op >= TOK_ULT && op <= TOK_GT) {
1949             /* if saved lvalue, then we must reload it */
1950             r = vtop->r;
1951             fc = vtop->c.i;
1952             if ((r & VT_VALMASK) == VT_LLOCAL) {
1953                 SValue v1;
1954                 r = get_reg(RC_INT);
1955                 v1.type.t = VT_PTR;
1956                 v1.r = VT_LOCAL | VT_LVAL;
1957                 v1.c.i = fc;
1958                 load(r, &v1);
1959                 fc = 0;
1960                 vtop->r = r = r | VT_LVAL;
1961             }
1962 
1963             if (op == TOK_EQ || op == TOK_NE) {
1964                 swapped = 0;
1965             } else {
1966                 if (op == TOK_LE || op == TOK_LT)
1967                     swapped = !swapped;
1968                 if (op == TOK_LE || op == TOK_GE) {
1969                     op = 0x93; /* setae */
1970                 } else {
1971                     op = 0x97; /* seta */
1972                 }
1973             }
1974 
1975             if (swapped) {
1976                 gv(RC_FLOAT);
1977                 vswap();
1978             }
1979             assert(!(vtop[-1].r & VT_LVAL));
1980 
1981             if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1982                 o(0x66);
1983             if (op == TOK_EQ || op == TOK_NE)
1984                 o(0x2e0f); /* ucomisd */
1985             else
1986                 o(0x2f0f); /* comisd */
1987 
1988             if (vtop->r & VT_LVAL) {
1989                 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
1990             } else {
1991                 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
1992             }
1993 
1994             vtop--;
1995             vset_VT_CMP(op | 0x100);
1996             vtop->cmp_r = op;
1997         } else {
1998             assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE);
1999             switch(op) {
2000             default:
2001             case '+':
2002                 a = 0;
2003                 break;
2004             case '-':
2005                 a = 4;
2006                 break;
2007             case '*':
2008                 a = 1;
2009                 break;
2010             case '/':
2011                 a = 6;
2012                 break;
2013             }
2014             ft = vtop->type.t;
2015             fc = vtop->c.i;
2016             assert((ft & VT_BTYPE) != VT_LDOUBLE);
2017 
2018             r = vtop->r;
2019             /* if saved lvalue, then we must reload it */
2020             if ((vtop->r & VT_VALMASK) == VT_LLOCAL) {
2021                 SValue v1;
2022                 r = get_reg(RC_INT);
2023                 v1.type.t = VT_PTR;
2024                 v1.r = VT_LOCAL | VT_LVAL;
2025                 v1.c.i = fc;
2026                 load(r, &v1);
2027                 fc = 0;
2028                 vtop->r = r = r | VT_LVAL;
2029             }
2030 
2031             assert(!(vtop[-1].r & VT_LVAL));
2032             if (swapped) {
2033                 assert(vtop->r & VT_LVAL);
2034                 gv(RC_FLOAT);
2035                 vswap();
2036             }
2037 
2038             if ((ft & VT_BTYPE) == VT_DOUBLE) {
2039                 o(0xf2);
2040             } else {
2041                 o(0xf3);
2042             }
2043             o(0x0f);
2044             o(0x58 + a);
2045 
2046             if (vtop->r & VT_LVAL) {
2047                 gen_modrm(vtop[-1].r, r, vtop->sym, fc);
2048             } else {
2049                 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8);
2050             }
2051 
2052             vtop--;
2053         }
2054     }
2055 }
2056 
2057 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
2058    and 'long long' cases. */
gen_cvt_itof(int t)2059 void gen_cvt_itof(int t)
2060 {
2061     if ((t & VT_BTYPE) == VT_LDOUBLE) {
2062         save_reg(TREG_ST0);
2063         gv(RC_INT);
2064         if ((vtop->type.t & VT_BTYPE) == VT_LLONG) {
2065             /* signed long long to float/double/long double (unsigned case
2066                is handled generically) */
2067             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2068             o(0x242cdf); /* fildll (%rsp) */
2069             o(0x08c48348); /* add $8, %rsp */
2070         } else if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
2071                    (VT_INT | VT_UNSIGNED)) {
2072             /* unsigned int to float/double/long double */
2073             o(0x6a); /* push $0 */
2074             g(0x00);
2075             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2076             o(0x242cdf); /* fildll (%rsp) */
2077             o(0x10c48348); /* add $16, %rsp */
2078         } else {
2079             /* int to float/double/long double */
2080             o(0x50 + (vtop->r & VT_VALMASK)); /* push r */
2081             o(0x2404db); /* fildl (%rsp) */
2082             o(0x08c48348); /* add $8, %rsp */
2083         }
2084         vtop->r = TREG_ST0;
2085     } else {
2086         int r = get_reg(RC_FLOAT);
2087         gv(RC_INT);
2088         o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0));
2089         if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) ==
2090             (VT_INT | VT_UNSIGNED) ||
2091             (vtop->type.t & VT_BTYPE) == VT_LLONG) {
2092             o(0x48); /* REX */
2093         }
2094         o(0x2a0f);
2095         o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */
2096         vtop->r = r;
2097     }
2098 }
2099 
2100 /* convert from one floating point type to another */
gen_cvt_ftof(int t)2101 void gen_cvt_ftof(int t)
2102 {
2103     int ft, bt, tbt;
2104 
2105     ft = vtop->type.t;
2106     bt = ft & VT_BTYPE;
2107     tbt = t & VT_BTYPE;
2108 
2109     if (bt == VT_FLOAT) {
2110         gv(RC_FLOAT);
2111         if (tbt == VT_DOUBLE) {
2112             o(0x140f); /* unpcklps */
2113             o(0xc0 + REG_VALUE(vtop->r)*9);
2114             o(0x5a0f); /* cvtps2pd */
2115             o(0xc0 + REG_VALUE(vtop->r)*9);
2116         } else if (tbt == VT_LDOUBLE) {
2117             save_reg(RC_ST0);
2118             /* movss %xmm0,-0x10(%rsp) */
2119             o(0x110ff3);
2120             o(0x44 + REG_VALUE(vtop->r)*8);
2121             o(0xf024);
2122             o(0xf02444d9); /* flds -0x10(%rsp) */
2123             vtop->r = TREG_ST0;
2124         }
2125     } else if (bt == VT_DOUBLE) {
2126         gv(RC_FLOAT);
2127         if (tbt == VT_FLOAT) {
2128             o(0x140f66); /* unpcklpd */
2129             o(0xc0 + REG_VALUE(vtop->r)*9);
2130             o(0x5a0f66); /* cvtpd2ps */
2131             o(0xc0 + REG_VALUE(vtop->r)*9);
2132         } else if (tbt == VT_LDOUBLE) {
2133             save_reg(RC_ST0);
2134             /* movsd %xmm0,-0x10(%rsp) */
2135             o(0x110ff2);
2136             o(0x44 + REG_VALUE(vtop->r)*8);
2137             o(0xf024);
2138             o(0xf02444dd); /* fldl -0x10(%rsp) */
2139             vtop->r = TREG_ST0;
2140         }
2141     } else {
2142         int r;
2143         gv(RC_ST0);
2144         r = get_reg(RC_FLOAT);
2145         if (tbt == VT_DOUBLE) {
2146             o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2147             /* movsd -0x10(%rsp),%xmm0 */
2148             o(0x100ff2);
2149             o(0x44 + REG_VALUE(r)*8);
2150             o(0xf024);
2151             vtop->r = r;
2152         } else if (tbt == VT_FLOAT) {
2153             o(0xf0245cd9); /* fstps -0x10(%rsp) */
2154             /* movss -0x10(%rsp),%xmm0 */
2155             o(0x100ff3);
2156             o(0x44 + REG_VALUE(r)*8);
2157             o(0xf024);
2158             vtop->r = r;
2159         }
2160     }
2161 }
2162 
2163 /* convert fp to int 't' type */
gen_cvt_ftoi(int t)2164 void gen_cvt_ftoi(int t)
2165 {
2166     int ft, bt, size, r;
2167     ft = vtop->type.t;
2168     bt = ft & VT_BTYPE;
2169     if (bt == VT_LDOUBLE) {
2170         gen_cvt_ftof(VT_DOUBLE);
2171         bt = VT_DOUBLE;
2172     }
2173 
2174     gv(RC_FLOAT);
2175     if (t != VT_INT)
2176         size = 8;
2177     else
2178         size = 4;
2179 
2180     r = get_reg(RC_INT);
2181     if (bt == VT_FLOAT) {
2182         o(0xf3);
2183     } else if (bt == VT_DOUBLE) {
2184         o(0xf2);
2185     } else {
2186         assert(0);
2187     }
2188     orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2189     o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8);
2190     vtop->r = r;
2191 }
2192 
2193 // Generate sign extension from 32 to 64 bits:
gen_cvt_sxtw(void)2194 ST_FUNC void gen_cvt_sxtw(void)
2195 {
2196     int r = gv(RC_INT);
2197     /* x86_64 specific: movslq */
2198     o(0x6348);
2199     o(0xc0 + (REG_VALUE(r) << 3) + REG_VALUE(r));
2200 }
2201 
2202 /* char/short to int conversion */
gen_cvt_csti(int t)2203 ST_FUNC void gen_cvt_csti(int t)
2204 {
2205     int r, sz, xl, ll;
2206     r = gv(RC_INT);
2207     sz = !(t & VT_UNSIGNED);
2208     xl = (t & VT_BTYPE) == VT_SHORT;
2209     ll = (vtop->type.t & VT_BTYPE) == VT_LLONG;
2210     orex(ll, r, 0, 0xc0b60f /* mov[sz] %a[xl], %eax */
2211         | (sz << 3 | xl) << 8
2212         | (REG_VALUE(r) << 3 | REG_VALUE(r)) << 16
2213         );
2214 }
2215 
2216 /* computed goto support */
ggoto(void)2217 void ggoto(void)
2218 {
2219     gcall_or_jmp(1);
2220     vtop--;
2221 }
2222 
2223 /* Save the stack pointer onto the stack and return the location of its address */
gen_vla_sp_save(int addr)2224 ST_FUNC void gen_vla_sp_save(int addr) {
2225     /* mov %rsp,addr(%rbp)*/
2226     gen_modrm64(0x89, TREG_RSP, VT_LOCAL, NULL, addr);
2227 }
2228 
2229 /* Restore the SP from a location on the stack */
gen_vla_sp_restore(int addr)2230 ST_FUNC void gen_vla_sp_restore(int addr) {
2231     gen_modrm64(0x8b, TREG_RSP, VT_LOCAL, NULL, addr);
2232 }
2233 
2234 #ifdef TCC_TARGET_PE
2235 /* Save result of gen_vla_alloc onto the stack */
gen_vla_result(int addr)2236 ST_FUNC void gen_vla_result(int addr) {
2237     /* mov %rax,addr(%rbp)*/
2238     gen_modrm64(0x89, TREG_RAX, VT_LOCAL, NULL, addr);
2239 }
2240 #endif
2241 
2242 /* Subtract from the stack pointer, and push the resulting value onto the stack */
gen_vla_alloc(CType *type, int align)2243 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2244     int use_call = 0;
2245 
2246 #if defined(CONFIG_TCC_BCHECK)
2247     use_call = tcc_state->do_bounds_check;
2248 #endif
2249 #ifdef TCC_TARGET_PE	/* alloca does more than just adjust %rsp on Windows */
2250     use_call = 1;
2251 #endif
2252     if (use_call)
2253     {
2254         vpush_global_sym(&func_old_type, TOK_alloca);
2255         vswap(); /* Move alloca ref past allocation size */
2256         gfunc_call(1);
2257     }
2258     else {
2259         int r;
2260         r = gv(RC_INT); /* allocation size */
2261         /* sub r,%rsp */
2262         o(0x2b48);
2263         o(0xe0 | REG_VALUE(r));
2264         /* We align to 16 bytes rather than align */
2265         /* and ~15, %rsp */
2266         o(0xf0e48348);
2267         vpop();
2268     }
2269 }
2270 
2271 
2272 /* end of x86-64 code generator */
2273 /*************************************************************/
2274 #endif /* ! TARGET_DEFS_ONLY */
2275 /******************************************************/
2276