1: let dbg = ref true 2: and inch = ref stdin 3: 4: type tok = 5: Op of string 6: ILit of int 7: SLit of int * string 8: Sym of int 9: 10: let bufferize f = 11: let buf = ref None in 12: (fun () -> 13: match !buf with 14: Some x -> buf := None; x 15: None -> f ()), 16: (fun x -> 17: assert (!buf = None); 18: buf := Some x) 19: 20: let getch, ungetch = 21: bufferize (fun () -> input_char !inch) 22: 23: let peekch () = 24: let ch = getch () in 25: ungetch ch; ch 26: 27: let addsym, symstr, symitr = 28: let symtab = Array.make 100 "" and syms = ref 0 in 29: let rec find s n = 30: if n >= !syms then (incr syms; n) else 31: if symtab.(n) = s then n else 32: find s (n+1) in 33: 34: (fun s -> 35: let sid = find s 0 in 36: symtab.(sid) <- s; 37: sid), 38: 39: (fun n -> 40: assert (n < !syms); 41: symtab.(n)), 42: 43: (fun f -> 44: for i=0 to !syms-1 do 45: f i symtab.(i) 46: done) 47: 48: let glo = String.make 0x1000 '\x00' 49: and gpos = ref 0 50: 51: let base = 0x400000 52: and textoff = 0xe8 53: 54: let next = 55: let s = String.create 100 in 56: 57: let getq () = 58: match getch () with 59: '\\' when peekch () = 'n' -> 60: ignore (getch ()); '\n' 61: c -> c in 62: 63: let isid = function 64: 'a' .. 'z' 'A' .. 'Z' '_' -> true 65: _ -> false in 66: 67: let rec id n ch = 68: s.[n] <- ch; 69: if isid (peekch ()) 70: then id (n+1) (getch ()) 71: else Sym (addsym (String.sub s 0 (n+1))) in 72: 73: let rec ilit n = 74: match peekch () with 75: '0' .. '9' -> ilit (10*n + Char.code (getch ()) - 48) 76: _ -> ILit n in 77: 78: let rec slit b e = 79: match peekch () with 80: '"' -> 81: ignore (getch ()); 82: gpos := (e + 8) land -8; 83: SLit (b+textoff + base, String.sub glo b (e-b)) 84: _ -> 85: glo.[e] <- getq (); 86: slit b (e+1) in 87: 88: let longops = 89: [ "++"; "--"; "&&"; "||"; "=="; 90: "<="; ">="; "!="; ">>"; "<<" ] in 91: let rec op ch = function 92: lop :: l -> 93: if lop.[0] = ch && lop.[1] = peekch () 94: then (ignore (getch ()); Op lop) 95: else op ch l 96: [] -> Op (String.make 1 ch) in 97: 98: let cconst () = 99: let ch = getq () in 100: let qt = getch () in 101: if qt <> '\'' then failwith "syntax error" else 102: ILit (Char.code ch) in 103: 104: let rec skip () = 105: match getch () with 106: '\t' ' ' '\r' '\n' -> skip () 107: '/' when peekch () = '*' -> com (ignore (getch ())) 108: ch -> ch 109: and com () = 110: match getch () with 111: '*' when peekch () = '/' -> skip (ignore (getch ())) 112: _ -> com () in 113: 114: fun () -> (* next token *) 115: match 116: try Some (skip ()) 117: with End_of_file -> None 118: with 119: Some ('0' .. '9' as c) -> ilit (Char.code c - 48) 120: Some '"' -> slit !gpos !gpos 121: Some '\'' -> cconst () 122: Some c when isid c -> id 0 c 123: Some c -> op c longops 124: None -> Op "EOF!" 125: 126: let next, unnext = 127: bufferize next 128: 129: let nextis t = 130: let nt = next () in 131: unnext nt; t = nt 132: 133: let obuf = String.make 0x100000 '\x00' 134: and opos = ref 0 135: 136: let rec out x = 137: if x <> 0 then begin 138: out (x/0x100); 139: obuf.[!opos] <- Char.chr (x land 0xff); 140: incr opos 141: end 142: 143: let le n x = 144: for i = 0 to n/8 - 1 do 145: let byte = (x lsr (i*8)) land 0xff in 146: obuf.[!opos] <- Char.chr byte; 147: incr opos 148: done 149: 150: let get32 l = 151: Char.code obuf.[l] + 152: Char.code obuf.[l+1] * 0x100 + 153: Char.code obuf.[l+2] * 0x10000 + 154: Char.code obuf.[l+3] * 0x1000000 155: 156: let rec patch rel loc n = 157: assert (n < 0x100000000); 158: if loc <> 0 then begin 159: let i = !opos in 160: let loc' = get32 loc in 161: let x = if rel then n - (loc+4) else n in 162: if !dbg then 163: Printf.eprintf "patching at %d to %d (n=%d)\n" loc x n; 164: opos := loc; le 32 x; 165: patch rel loc' n; 166: opos := i 167: end 168: 169: let load r n = 170: out (0xb8+r); le 32 n 171: 172: let cmp n = 173: load 0 0; 174: out (0x0f92c0 + (n lsl 8)) 175: 176: let test n l = 177: out 0x4885c0; 178: out (0x0f84 + n); 179: let loc = !opos in 180: le 32 l; 181: loc 182: 183: let align = ref 0 184: let push r = 185: incr align; 186: if r<8 187: then out (0x50+r) 188: else out (0x4150+r-8) 189: and pop r = 190: decr align; 191: if r<8 192: then out (0x58+r) 193: else out (0x4158+r-8) 194: 195: type lvpatch = 196: Mov of int 197: Del of int 198: 199: type lvty = 200: Int 201: Chr 202: 203: let lval = ref (Mov 0, Int) 204: 205: let patchlval () = 206: match fst !lval with 207: Mov n -> obuf.[!opos - n] <- '\x8d' 208: Del n -> opos := !opos - n 209: 210: let read = function 211: Int -> 212: out 0x488b; le 8 0; 213: lval := (Del 3, Int) 214: Chr -> 215: out 0x480fb6; le 8 0; 216: lval := (Del 4, Chr) 217: 218: type globref = { loc : int; va : int } 219: 220: let globs = Array.make 100 { loc = 0; va = -1 } 221: 222: let lvls = [ 223: ("*", 0); ("/", 0); ("%", 0); 224: ("+", 1); ("-", 1); 225: ("<<", 2); (">>", 2); 226: ("<", 3); ("<=", 3); (">", 3); (">=", 3); 227: ("==", 4); ("!=", 4); 228: ("&", 5); 229: ("^", 6); 230: ("|", 7); 231: ("&&", 8); 232: ("||", 9); 233: ] 234: 235: type ins = 236: Bin of int list 237: Cmp of int 238: 239: let inss = [ 240: ("*", Bin [0x480fafc1]); 241: ("/", Bin [0x4891; 0x4899; 0x48f7f9]); 242: ("%", Bin [0x4891; 0x4899; 0x48f7f9; 0x4892]); 243: ("+", Bin [0x4801c8]); ("-", Bin [0x4891; 0x4829c8]); 244: ("<<", Bin [0x4891; 0x48d3e0]); (">>", Bin [0x4891; 0x48d3f8]); 245: ("<", Cmp 10); ("<=", Cmp 12); (">", Cmp 13); (">=", Cmp 11); 246: ("==", Cmp 2); ("!=", Cmp 3); 247: ("&", Bin [0x4821c8]); 248: ("^", Bin [0x4831c8]); 249: ("|", Bin [0x4809c8]) 250: ] 251: 252: let tokint = Sym (addsym "int") 253: and tokchar = Sym (addsym "char") 254: and tokret = Sym (addsym "return") 255: and tokif = Sym (addsym "if") 256: and tokelse = Sym (addsym "else") 257: and tokwhile = Sym (addsym "while") 258: and tokfor = Sym (addsym "for") 259: and tokbreak = Sym (addsym "break") 260: 261: let rec binary stk lvl = 262: if lvl = -1 then unary stk else 263: let lvlof o = 264: if not (List.mem_assoc o lvls) then -1 else 265: List.assoc o lvls in 266: let rec fold () = 267: match next () with 268: Op o when lvlof o = lvl -> 269: push 0; (* push %rax *) 270: binary stk (lvl-1); 271: pop 1; (* pop %rcx *) 272: begin match List.assoc o inss with 273: Bin ops -> List.iter out ops 274: Cmp c -> out 0x4839c1; cmp c 275: end; 276: fold () 277: t -> unnext t in 278: let rec foldtst loc = 279: match next () with 280: Op o when lvlof o = lvl -> 281: let loc' = test (lvl-8) loc in 282: binary stk (lvl-1); 283: foldtst loc' 284: t -> unnext t; loc in 285: binary stk (lvl-1); 286: if lvl < 8 then fold () else 287: let loc = foldtst 0 in 288: patch true loc !opos 289: 290: and unary stk = 291: match next () with 292: Sym i -> 293: if List.mem_assoc i stk then ( 294: let l = List.assoc i stk in 295: assert (l > -256); 296: out (0x488b45); 297: out (l land 255); (* mov l(%rbp), %rax *) 298: lval := (Mov 3, Int)) 299: else ( 300: out 0x48b8; 301: let g = globs.(i) 302: and loc = !opos in 303: le 64 g.loc; (* mov $g.loc, %rax *) 304: globs.(i) <- { g with loc }; 305: read Int); 306: postfix stk 307: SLit (l, _) -> 308: out 0x48b8; 309: le 64 l (* mov $l, %rax *) 310: ILit i -> load 0 i (* mov $i, %eax *) 311: Op "(" -> 312: expr stk; 313: ignore (next ()); (* XXX use expect here *) 314: postfix stk 315: Op "*" -> 316: let (ty, i) = 317: ignore (next ()); 318: match next () with 319: t when t = tokint -> 320: if next () = Op "*" 321: then (Int, 1) else (Int, 5) 322: t when t = tokchar -> (Chr, 2) 323: _ -> failwith "[cast] expected" in 324: for k=1 to i 325: do ignore (next ()) done; 326: unary stk; 327: read ty 328: Op "&" -> 329: unary stk; 330: patchlval () 331: Op o -> 332: let unops = 333: [ ("+", 0); 334: ("-", 0x48f7d8); 335: ("~", 0x48f7d0); 336: ("!", 0x4885c0) ] in 337: unary stk; 338: if not (List.mem_assoc o unops) then 339: failwith (Printf.sprintf "unknown operator %s" o); 340: out (List.assoc o unops); 341: if o = "!" then 342: cmp 2 (* setz %al *) 343: 344: and postfix stk = 345: match next () with 346: Op ("++" "--" as op) -> 347: let ol = [ 348: (("++", Int), 0x48ff01); (* incq (%rcx) *) 349: (("--", Int), 0x48ff09); (* decq (%rcx) *) 350: (("++", Chr), 0xfe01); (* incb (%rcx) *) 351: (("--", Chr), 0xfe09); (* decb (%rcx) *) 352: ] in 353: patchlval (); 354: out 0x4889c1; (* mov %rax, %rcx *) 355: read (snd !lval); 356: out (List.assoc (op, snd !lval) ol) 357: Op "(" -> 358: let regs = [ 7; 6; 2; 1; 8; 9 ] in 359: let rec emitargs l rl = 360: if nextis (Op ")") then ( 361: ignore (next ()); 362: List.iter pop l) 363: else ( 364: expr stk; 365: push 0; 366: if nextis (Op ",") then 367: ignore (next ()); 368: emitargs (List.hd rl :: l) (List.tl rl)) in 369: patchlval (); 370: push 0; (* push %rax *) 371: emitargs [] regs; 372: pop 0; (* pop %rax *) 373: if !align mod 2 <> 0 then 374: out 0x4883ec08; (* sub 8, %rsp *) 375: out 0xffd0; (* call *%rax *) 376: if !align mod 2 <> 0 then 377: out 0x4883c408 (* add 8, %rsp *) 378: t -> unnext t 379: 380: and expr stk = 381: let rec eqexpr () = 382: match next () with 383: Op "=" -> 384: patchlval (); 385: let ty = snd !lval in 386: push 0; (* push %rax *) 387: expr stk; 388: pop 1; (* pop %rcx *) 389: if ty = Int 390: then out 0x488901 (* mov %rax, (%rcx) *) 391: else out 0x8801; (* mov %al, (%rcx) *) 392: eqexpr () 393: t -> unnext t in 394: binary stk 10; 395: eqexpr () 396: 397: let rec decl g n stk = 398: match next () with 399: t when t = tokint -> 400: let top = 401: match stk with 402: (_, i) :: _ -> i 403: _ -> 0 in 404: let rec vars n stk = 405: while nextis (Op "*") 406: do ignore (next ()) done; 407: if nextis (Op ";") then (n, stk) else 408: match next () with 409: Sym s -> 410: let n' = n + 1 in 411: let stk' = 412: if g then ( 413: let glo = globs.(s) in 414: if glo.va >= 0 then 415: failwith "symbol defined twice"; 416: let va = !gpos+textoff + base in 417: globs.(s) <- { glo with va }; 418: gpos := !gpos + 8; 419: stk) 420: else 421: (s, top - 8*n') :: stk in 422: if not (nextis (Op ",")) 423: then (n', stk') else begin 424: ignore (next ()); 425: vars n' stk' 426: end 427: _ -> failwith "[var] expected in [decl]" in 428: let (m, stk') = vars 0 stk in 429: ignore (next ()); 430: if !dbg then 431: Printf.eprintf "end of decl (%d vars)\n" n; 432: decl g (n+m) stk' 433: t -> 434: unnext t; 435: if not g && n <> 0 then begin 436: assert (n * 8 < 256); 437: out 0x4883ec; 438: out (n * 8); (* sub $n*8, %rsp *) 439: align := !align+n 440: end; 441: if !dbg && not g then 442: prerr_endline "end of blk decls"; 443: (n, stk) 444: 445: let retl = ref 0 446: 447: let rec stmt brk stk = 448: let pexpr stk = 449: ignore (next ()); (* XXX expect ( *) 450: expr stk; 451: ignore (next ()) in (* XXX expect ) *) 452: match next () with 453: t when t = tokif -> 454: pexpr stk; 455: let loc = test 0 0 in 456: stmt brk stk; 457: let loc = 458: if not (nextis tokelse) 459: then loc else begin 460: ignore (next ()); 461: out 0xe9; (* jmp *) 462: let l = !opos in 463: le 32 0; 464: patch true loc !opos; 465: stmt brk stk; 466: l 467: end in 468: patch true loc !opos 469: t when t = tokwhile || t = tokfor -> 470: let (bl, ba) = (ref 0, !align) in 471: let (bdy, itr) = 472: if t = tokwhile then ( 473: let loc = !opos in 474: pexpr stk; 475: bl := test 0 0; 476: (0, loc)) 477: else ( 478: ignore (next ()); 479: if not (nextis (Op ";")) then 480: expr stk; 481: ignore (next ()); 482: let top = !opos in 483: if not (nextis (Op ";")) then begin 484: expr stk; 485: bl := test 0 0 486: end else 487: bl := 0; 488: ignore (next ()); 489: out 0xe9; 490: let bdy = !opos in 491: le 32 0; 492: let itr = !opos in 493: expr stk; 494: ignore (next ()); 495: out 0xe9; 496: le 32 (top - !opos - 4); 497: (bdy, itr)) in 498: patch true bdy !opos; 499: stmt (bl, ba) stk; 500: out 0xe9; (* jmp *) 501: le 32 (itr - !opos - 4); 502: patch true !bl !opos 503: t when t = tokret -> 504: if not (nextis (Op ";")) then 505: expr stk; 506: ignore (next ()); (* XXX expect here *) 507: out 0xe9; (* jmp *) 508: let loc = !opos in 509: le 32 !retl; 510: retl := loc 511: t when t = tokbreak -> 512: ignore (next ()); 513: let (brkl, brka) = brk in 514: let n = !align - brka in 515: assert (n >= 0); 516: if n <> 0 then begin 517: out 0x4883c4; (* add $n*8, %rsp *) 518: out (n * 8) 519: end; 520: out 0xe9; 521: let loc = !opos in 522: le 32 !brkl; 523: brkl := loc 524: Op "{" -> block brk stk 525: Op ";" -> () 526: t -> 527: unnext t; 528: expr stk; 529: ignore (next ()) (* use expect XXX *) 530: 531: and block brk stk = 532: let (n, stk') = decl false 0 stk in 533: while not (nextis (Op "}")) do 534: stmt brk stk'; 535: done; 536: ignore (next ()); 537: if n <> 0 then begin 538: out 0x4883c4; 539: out (n * 8); (* add $n*8, %rsp *) 540: align := !align-n 541: end 542: 543: let rec top () = 544: if not (nextis (Op "EOF!")) then 545: if nextis tokint 546: then (ignore (decl true 0 []); top ()) 547: else match next () with 548: Sym f -> 549: let g = globs.(f) in 550: if g.va >= 0 then 551: failwith "symbol defined twice"; 552: globs.(f) <- { g with va = !opos }; 553: let regs = [ 7; 6; 2; 1; 8; 9 ] in 554: let rec emitargs regs n stk = 555: match next () with 556: Sym i -> 557: let r = List.hd regs in 558: push r; 559: if nextis (Op ",") then 560: ignore (next ()); 561: let stk' = (i, -n*8) :: stk in 562: emitargs (List.tl regs) (n+1) stk' 563: Op ")" -> stk 564: _ -> failwith "[var] or ) expected" in 565: ignore (next ()); (* expect here XXX *) 566: align := 0; 567: out 0x55; (* push %rbp NO push! *) 568: out 0x4889e5; (* mov %rsp, %rbp *) 569: let stk = emitargs regs 1 [] in 570: while next () <> Op "{" 571: do () done; 572: retl := 0; 573: block (ref 0, 0) stk; 574: patch true !retl !opos; 575: out 0xc9c3; (* leave; ret *) 576: if !dbg then 577: Printf.eprintf "done with function %s\n" (symstr f); 578: top () 579: _ -> failwith "[decl] or [fun] expected" 580: 581: let elfhdr = 582: String.concat "" 583: [ "\x7fELF\x02\x01\x01\x00"; (* e_ident, 64bits, little endian *) 584: "\x00\x00\x00\x00\x00\x00\x00\x00"; 585: "\x02\x00"; (* e_type, ET_EXEC *) 586: "\x3e\x00"; (* e_machine, EM_X86_64 *) 587: "\x01\x00\x00\x00"; (* e_version, EV_CURRENT*) 588: "\x00\x00\x00\x00\x00\x00\x00\x00"; (* e_entry *) 589: "\x40\x00\x00\x00\x00\x00\x00\x00"; (* e_phoff *) 590: "\x00\x00\x00\x00\x00\x00\x00\x00"; (* e_shoff *) 591: "\x00\x00\x00\x00"; (* e_flags *) 592: "\x40\x00"; (* e_hsize *) 593: "\x38\x00"; (* e_phentsize *) 594: "\x03\x00"; (* e_phnum *) 595: "\x40\x00"; (* e_shentsize *) 596: "\x00\x00"; (* e_shnum *) 597: "\x00\x00"; (* e_shstrndx *) 598: ] 599: 600: let elfphdr ty off sz align = 601: le 32 ty; (* p_type *) 602: le 32 7; (* p_flags, RWX *) 603: le 64 off; (* p_offset *) 604: le 64 (off + base); (* p_vaddr *) 605: le 64 (off + base); (* p_paddr *) 606: le 64 sz; (* p_filesz *) 607: le 64 sz; (* p_memsz *) 608: le 64 align (* p_align *) 609: 610: let elfgen outf = 611: let entry = !opos in 612: let main = addsym "main" in 613: let gmain = globs.(main) in 614: out 0x488b3c24; (* mov (%rsp), %rdi *) 615: out 0x488d742408; (* lea 8(%rsp), %rsi *) 616: out 0x48b8; 617: le 64 gmain.loc; (* mov main, %rax *) 618: globs.(main) <- { gmain with loc = !opos-8 }; 619: out 0xffd0; (* call *%rax *) 620: out 0x89c7; (* mov %eax, %edi *) 621: load 0 0x3c; 622: out 0x0f05; (* syscall *) 623: let off = textoff + !gpos in 624: let itr f = 625: symitr (fun i s -> 626: let g = globs.(i) in 627: if g.va < 0 && g.loc <> 0 then 628: f s (String.length s) g.loc) in 629: let va x = x+off + base in 630: let patchloc i _ = 631: let g = globs.(i) in 632: if g.va >= 0 && g.va < base then 633: patch false g.loc (va g.va) 634: else if g.va >= 0 then 635: patch false g.loc g.va in 636: symitr patchloc; 637: let strtab = !opos in 638: incr opos; (* initial 0 *) 639: let dl = "/lib64/ld-linux-x86-64.so.2\x00libc.so.6" 640: and dllen = 27+1+9 in 641: String.blit dl 0 obuf !opos dllen; 642: opos := !opos + dllen+1; 643: itr (fun s sl _ -> 644: String.blit s 0 obuf !opos sl; 645: opos := !opos + sl+1); 646: opos := (!opos + 7) land -8; 647: let symtab = !opos 648: and n = ref (dllen+2) in 649: opos := !opos + 24; (* first is reserved *) 650: itr (fun _ sl _ -> 651: le 32 !n; (* st_name *) 652: le 32 0x10; (* global | notype *) 653: le 64 0; (* st_value *) 654: le 64 0; (* st_size *) 655: n := !n + sl+1); 656: let rel = !opos 657: and n = ref 1 in 658: itr (fun _ _ l -> 659: let rec genrel l = 660: if l <> 0 then begin 661: le 64 (va l); 662: le 64 (1 + !n lsl 32); (* R_X86_64_64 *) 663: le 64 0; 664: genrel (get32 l) 665: end in 666: genrel l; 667: incr n); 668: let hash = !opos in 669: let n = (rel-symtab)/24 - 1 in 670: le 32 1; (* nbucket *) 671: le 32 (n+1); (* nchain *) 672: le 32 (if n>0 then 1 else 0); 673: for i=1 to n do 674: le 32 i 675: done; 676: le 32 0; 677: let dyn = !opos in 678: List.iter (le 64) [ 679: 1; 29; (* DT_NEEDED libc.so.6 *) 680: 4; (va hash); (* DT_HASH *) 681: 5; (va strtab); (* DT_STRTAB *) 682: 6; (va symtab); (* DT_SYMTAB *) 683: 7; (va rel); (* DT_RELA *) 684: 8; (hash-rel); (* DT_RELASZ *) 685: 9; 24; (* DT_RELAENT *) 686: 10; (symtab-strtab); (* DT_STRSZ *) 687: 11; 24; (* DT_SYMENT *) 688: 0; (* 0; *) (* DT_NULL *) 689: ]; 690: let tend = !opos in 691: String.blit obuf 0 obuf off tend; 692: String.blit glo 0 obuf textoff !gpos; 693: String.blit elfhdr 0 obuf 0 64; 694: opos := 64; 695: elfphdr 3 (strtab+1+off) 28 1; (* PT_INTERP *) 696: elfphdr 1 0 (tend+off) 0x200000; (* PT_LOAD *) 697: elfphdr 2 (dyn+off) (tend-dyn) 8; (* PT_DYNAMIC *) 698: assert (!opos = textoff); 699: patch false 24 (va entry); 700: output_string outf (String.sub obuf 0 (tend+off)) 701: 702: let main () = 703: let doone c stk = 704: opos := 0; c stk; 705: print_string (String.sub obuf 0 !opos) in 706: let ppsym = function 707: Op s -> Printf.printf "Operator '%s'\n" s 708: ILit i -> Printf.printf "Int literal %d\n" i 709: SLit (_, s) -> Printf.printf "Str literal %S\n" s 710: Sym i -> Printf.printf "Symbol '%s' (%d)\n" (symstr i) i in 711: let rec pptoks () = 712: match next () with 713: Op "EOF!" -> Printf.printf "End of input stream\n" 714: tok -> ppsym tok; pptoks () in 715: 716: match 717: if Array.length Sys.argv < 2 718: then "-blk" else Sys.argv.(1) 719: with 720: "-lex" -> pptoks () 721: "-blk" -> doone (block (ref 0, 0)) [] 722: f -> 723: let oc = open_out "out" in 724: inch := open_in f; 725: top (); elfgen oc; 726: close_out oc 727: 728: let _ = main ()
This document was generated using caml2html