3 # This code is licensed under GPLv2 or later; go to gnu.org to read it
4 # (not that it much matters for an asm preprocessor)
5 # usage: set your assembler to be something like "perl gas-preprocessor.pl gcc"
8 # Apple's gas is ancient and doesn't support modern preprocessing features like
9 # .rept and has ugly macro syntax, among other things. Thus, this script
10 # implements the subset of the gas preprocessor used by x264 and ffmpeg
11 # that isn't supported by Apple's gas.
13 my %canonical_arch = ("aarch64" => "aarch64", "arm64" => "aarch64",
15 "powerpc" => "powerpc", "ppc" => "powerpc");
17 my %comments = ("aarch64" => '//',
26 my $as_type = "apple-gas";
28 my $fix_unreq = $^O eq "darwin";
31 my $arm_cond_codes = "eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le|al|hs|lo";
35 Gas-preprocessor.pl converts assembler files using modern GNU as syntax for
36 Apple's ancient gas version or clang's incompatible integrated assembler. The
37 conversion is regularly tested for Libav, x264 and vlc. Other projects might
38 use different features which are not correctly handled.
40 Options for this program needs to be separated with ' -- ' from the assembler
41 command. Following options are currently supported:
43 -help - this usage text
44 -arch - target architecture
45 -as-type - one value out of {{,apple-}{gas,clang},armasm}
48 -force-thumb - assemble as thumb regardless of the input source
49 (note, this is incomplete and only works for sources
50 it explicitly was tested with)
60 if ($opt =~ /^-(no-)?fix-unreq$/) {
61 $fix_unreq = $1 ne "no-";
62 } elsif ($opt eq "-force-thumb") {
64 } elsif ($opt eq "-arch") {
66 die "unknown arch: '$arch'\n" if not exists $comments{$arch};
67 } elsif ($opt eq "-as-type") {
69 die "unknown as type: '$as_type'\n" if $as_type !~ /^((apple-)?(gas|clang)|armasm)$/;
70 } elsif ($opt eq "-help") {
73 } elsif ($opt eq "--" ) {
75 } elsif ($opt =~ /^-/) {
76 die "option '$opt' is not known. See '$0 -help' for usage information\n";
78 push @gcc_cmd, $opt, @ARGV;
83 if (grep /\.c$/, @gcc_cmd) {
84 # C file (inline asm?) - compile
85 @preprocess_c_cmd = (@gcc_cmd, "-S");
86 } elsif (grep /\.[sS]$/, @gcc_cmd) {
87 # asm file, just do C preprocessor
88 @preprocess_c_cmd = (@gcc_cmd, "-E");
89 } elsif (grep /-(v|h|-version|dumpversion)/, @gcc_cmd) {
90 # pass -v/--version along, used during probing. Matching '-v' might have
91 # uninteded results but it doesn't matter much if gas-preprocessor or
95 die "Unrecognized input filetype";
97 if ($as_type eq "armasm") {
99 $preprocess_c_cmd[0] = "cpp";
100 push(@preprocess_c_cmd, "-U__ELF__");
101 push(@preprocess_c_cmd, "-U__MACH__");
103 @preprocess_c_cmd = grep ! /^-nologo$/, @preprocess_c_cmd;
104 # Remove -ignore XX parameter pairs from preprocess_c_cmd
106 while ($index < $#preprocess_c_cmd) {
107 if ($preprocess_c_cmd[$index] eq "-ignore" and $index + 1 < $#preprocess_c_cmd) {
108 splice(@preprocess_c_cmd, $index, 2);
113 if (grep /^-MM$/, @preprocess_c_cmd) {
114 system(@preprocess_c_cmd) == 0 or die "Error running preprocessor";
119 # if compiling, avoid creating an output file named '-.o'
120 if ((grep /^-c$/, @gcc_cmd) && !(grep /^-o/, @gcc_cmd)) {
121 foreach my $i (@gcc_cmd) {
122 if ($i =~ /\.[csS]$/) {
124 $outputfile =~ s/\.[csS]$/.o/;
125 push(@gcc_cmd, "-o");
126 push(@gcc_cmd, $outputfile);
131 # replace only the '-o' argument with '-', avoids rewriting the make dependency
132 # target specified with -MT to '-'
134 while ($index < $#preprocess_c_cmd) {
135 if ($preprocess_c_cmd[$index] eq "-o") {
137 $preprocess_c_cmd[$index] = "-";
143 if ($as_type ne "armasm") {
144 @gcc_cmd = map { /\.[csS]$/ ? qw(-x assembler -) : $_ } @gcc_cmd;
146 @preprocess_c_cmd = grep ! /^-c$/, @preprocess_c_cmd;
147 @preprocess_c_cmd = grep ! /^-m/, @preprocess_c_cmd;
149 @preprocess_c_cmd = grep ! /^-G/, @preprocess_c_cmd;
150 @preprocess_c_cmd = grep ! /^-W/, @preprocess_c_cmd;
151 @preprocess_c_cmd = grep ! /^-Z/, @preprocess_c_cmd;
152 @preprocess_c_cmd = grep ! /^-fp/, @preprocess_c_cmd;
153 @preprocess_c_cmd = grep ! /^-EHsc$/, @preprocess_c_cmd;
154 @preprocess_c_cmd = grep ! /^-O/, @preprocess_c_cmd;
156 @gcc_cmd = grep ! /^-G/, @gcc_cmd;
157 @gcc_cmd = grep ! /^-W/, @gcc_cmd;
158 @gcc_cmd = grep ! /^-Z/, @gcc_cmd;
159 @gcc_cmd = grep ! /^-fp/, @gcc_cmd;
160 @gcc_cmd = grep ! /^-EHsc$/, @gcc_cmd;
161 @gcc_cmd = grep ! /^-O/, @gcc_cmd;
163 my @outfiles = grep /\.(o|obj)$/, @gcc_cmd;
164 $tempfile = $outfiles[0].".asm";
166 # Remove most parameters from gcc_cmd, which actually is the armasm command,
167 # which doesn't support any of the common compiler/preprocessor options.
168 @gcc_cmd = grep ! /^-D/, @gcc_cmd;
169 @gcc_cmd = grep ! /^-U/, @gcc_cmd;
170 @gcc_cmd = grep ! /^-m/, @gcc_cmd;
171 @gcc_cmd = grep ! /^-M/, @gcc_cmd;
172 @gcc_cmd = grep ! /^-c$/, @gcc_cmd;
173 @gcc_cmd = grep ! /^-I/, @gcc_cmd;
174 @gcc_cmd = map { /\.S$/ ? $tempfile : $_ } @gcc_cmd;
177 # detect architecture from gcc binary name
179 if ($gcc_cmd[0] =~ /(arm64|aarch64|arm|powerpc|ppc)/) {
182 # look for -arch flag
183 foreach my $i (1 .. $#gcc_cmd-1) {
184 if ($gcc_cmd[$i] eq "-arch" and
185 $gcc_cmd[$i+1] =~ /(arm64|aarch64|arm|powerpc|ppc)/) {
192 # assume we're not cross-compiling if no -arch or the binary doesn't have the arch name
193 $arch = qx/arch/ if (!$arch);
195 die "Unknown target architecture '$arch'" if not exists $canonical_arch{$arch};
197 $arch = $canonical_arch{$arch};
198 $comm = $comments{$arch};
199 my $inputcomm = $comm;
200 $comm = ";" if $as_type =~ /armasm/;
202 my %ppc_spr = (ctr => 9,
205 open(INPUT, "-|", @preprocess_c_cmd) || die "Error running preprocessor";
207 if ($ENV{GASPP_DEBUG}) {
208 open(ASMFILE, ">&STDOUT");
210 if ($as_type ne "armasm") {
211 open(ASMFILE, "|-", @gcc_cmd) or die "Error running assembler";
213 open(ASMFILE, ">", $tempfile);
217 my $current_macro = '';
222 my %macro_args_default;
239 my %literal_labels; # for ldr <reg>, =<expr>
241 my $literal_expr = ".word";
242 $literal_expr = ".quad" if $arch eq "aarch64";
253 my $temp_label_next = 0;
254 my %last_temp_labels;
255 my %next_temp_labels;
259 my %aarch64_req_alias;
262 parse_line(".thumb\n");
265 # pass 1: parse .macro
266 # note that the handling of arguments is probably overly permissive vs. gas
267 # but it should be the same for valid cases
269 # remove lines starting with '#', preprocessing is done, '#' at start of
270 # the line indicates a comment for all supported archs (aarch64, arm, ppc
271 # and x86). Also strips line number comments but since they are off anyway
274 # remove all comments (to avoid interfering with evaluating directives)
275 s/(?<!\\)$inputcomm.*//x;
276 # Strip out windows linefeeds
279 foreach my $subline (split(";", $_)) {
280 # Add newlines at the end of lines that don't already have one
283 parse_line($subline);
289 while ($expr =~ /([A-Za-z._][A-Za-z0-9._]*)/g) {
291 $expr =~ s/$sym/($symbols{$sym})/ if defined $symbols{$sym};
298 # handle .if directives; apple's assembler doesn't support important non-basic ones
299 # evaluating them is also needed to handle recursive macros
300 if ($line =~ /\.if(n?)([a-z]*)\s+(.*)/) {
301 my $result = $1 eq "n";
307 $result ^= $expr eq "";
308 } elsif ($type eq "c") {
309 if ($expr =~ /(.*)\s*,\s*(.*)/) {
312 die "argument to .ifc not recognized";
314 } elsif ($type eq "") {
315 $result ^= eval_expr($expr) != 0;
316 } elsif ($type eq "eq") {
317 $result = eval_expr($expr) == 0;
318 } elsif ($type eq "lt") {
319 $result = eval_expr($expr) < 0;
322 die "unhandled .if varient. \"$line\"";
324 push (@ifstack, $result);
334 # evaluate .if blocks
335 if (scalar(@ifstack)) {
336 # Don't evaluate any new if statements if we're within
337 # a repetition or macro - they will be evaluated once
338 # the repetition is unrolled or the macro is expanded.
339 if (scalar(@rept_lines) == 0 and $macro_level == 0) {
340 if ($line =~ /\.endif/) {
343 } elsif ($line =~ /\.elseif\s+(.*)/) {
344 if ($ifstack[-1] == 0) {
345 $ifstack[-1] = !!eval_expr($1);
346 } elsif ($ifstack[-1] > 0) {
347 $ifstack[-1] = -$ifstack[-1];
350 } elsif ($line =~ /\.else/) {
351 $ifstack[-1] = !$ifstack[-1];
353 } elsif (handle_if($line)) {
358 # discard lines in false .if blocks
359 foreach my $i (0 .. $#ifstack) {
360 if ($ifstack[$i] <= 0) {
371 return if (parse_if_line($line));
373 if (scalar(@rept_lines) == 0) {
376 if ($macro_level > 1 && !$current_macro) {
377 die "nested macros but we don't have master macro";
381 if ($macro_level < 0) {
382 die "unmatched .endm";
383 } elsif ($macro_level == 0) {
390 if ($macro_level == 0) {
391 if ($line =~ /\.(rept|irp)/) {
393 } elsif ($line =~ /.endr/) {
398 if ($macro_level > 1) {
399 push(@{$macro_lines{$current_macro}}, $line);
400 } elsif (scalar(@rept_lines) and $rept_level >= 1) {
401 push(@rept_lines, $line);
402 } elsif ($macro_level == 0) {
403 expand_macros($line);
405 if ($line =~ /\.macro\s+([\d\w\.]+)\s*,?\s*(.*)/) {
408 # commas in the argument list are optional, so only use whitespace as the separator
412 my @args = split(/\s+/, $arglist);
413 foreach my $i (0 .. $#args) {
414 my @argpair = split(/=/, $args[$i]);
415 $macro_args{$current_macro}[$i] = $argpair[0];
416 $argpair[0] =~ s/:vararg$//;
417 $macro_args_default{$current_macro}{$argpair[0]} = $argpair[1];
419 # ensure %macro_lines has the macro name added as a key
420 $macro_lines{$current_macro} = [];
422 } elsif ($current_macro) {
423 push(@{$macro_lines{$current_macro}}, $line);
425 die "macro level without a macro name";
432 if ($line =~ /\.set\s+(.*),\s*(.*)/) {
433 $symbols{$1} = eval_expr($2);
442 # handle .if directives; apple's assembler doesn't support important non-basic ones
443 # evaluating them is also needed to handle recursive macros
444 if (handle_if($line)) {
448 if (/\.purgem\s+([\d\w\.]+)/) {
449 delete $macro_lines{$1};
450 delete $macro_args{$1};
451 delete $macro_args_default{$1};
455 if ($line =~ /\.altmacro/) {
460 if ($line =~ /\.noaltmacro/) {
465 $line =~ s/\%([^,]*)/eval_expr($1)/eg if $altmacro;
467 # Strip out the .set lines from the armasm output
468 return if (handle_set($line) and $as_type eq "armasm");
470 if ($line =~ /\.rept\s+(.*)/) {
472 @rept_lines = ("\n");
474 # handle the possibility of repeating another directive on the same line
475 # .endr on the same line is not valid, I don't know if a non-directive is
476 if ($num_repts =~ s/(\.\w+.*)//) {
477 push(@rept_lines, "$1\n");
479 $num_repts = eval_expr($num_repts);
480 } elsif ($line =~ /\.irp\s+([\d\w\.]+)\s*(.*)/) {
483 @rept_lines = ("\n");
486 # only use whitespace as the separator
487 my $irp_arglist = $2;
488 $irp_arglist =~ s/,/ /g;
489 $irp_arglist =~ s/^\s+//;
490 @irp_args = split(/\s+/, $irp_arglist);
491 } elsif ($line =~ /\.irpc\s+([\d\w\.]+)\s*(.*)/) {
494 @rept_lines = ("\n");
497 my $irp_arglist = $2;
498 $irp_arglist =~ s/,/ /g;
499 $irp_arglist =~ s/^\s+//;
500 @irp_args = split(//, $irp_arglist);
501 } elsif ($line =~ /\.endr/) {
502 my @prev_rept_lines = @rept_lines;
503 my $prev_in_irp = $in_irp;
504 my @prev_irp_args = @irp_args;
505 my $prev_irp_param = $irp_param;
506 my $prev_num_repts = $num_repts;
511 if ($prev_in_irp != 0) {
512 foreach my $i (@prev_irp_args) {
513 foreach my $origline (@prev_rept_lines) {
514 my $line = $origline;
515 $line =~ s/\\$prev_irp_param/$i/g;
516 $line =~ s/\\\(\)//g; # remove \()
521 for (1 .. $prev_num_repts) {
522 foreach my $origline (@prev_rept_lines) {
523 my $line = $origline;
528 } elsif ($line =~ /(\S+:|)\s*([\w\d\.]+)\s*(.*)/ && exists $macro_lines{$2}) {
529 handle_serialized_line($1);
532 # commas are optional here too, but are syntactically important because
533 # parameters can be blank
534 my @arglist = split(/,/, $3);
538 my $comma_sep_required = 0;
540 # allow arithmetic/shift operators in macro arguments
541 $_ =~ s/\s*(\+|-|\*|\/|<<|>>|<|>)\s*/$1/g;
543 my @whitespace_split = split(/\s+/, $_);
544 if (!@whitespace_split) {
546 push(@args_seperator, '');
548 foreach (@whitespace_split) {
549 #print ("arglist = \"$_\"\n");
552 my $sep = $comma_sep_required ? "," : " ";
553 push(@args_seperator, $sep);
554 #print ("sep = \"$sep\", arg = \"$_\"\n");
555 $comma_sep_required = 0;
560 $comma_sep_required = 1;
564 if ($macro_args_default{$macro}){
565 %replacements = %{$macro_args_default{$macro}};
568 # construct hashtable of text to replace
569 foreach my $i (0 .. $#args) {
570 my $argname = $macro_args{$macro}[$i];
571 my @macro_args = @{ $macro_args{$macro} };
572 if ($args[$i] =~ m/=/) {
573 # arg=val references the argument name
574 # XXX: I'm not sure what the expected behaviour if a lot of
575 # these are mixed with unnamed args
576 my @named_arg = split(/=/, $args[$i]);
577 $replacements{$named_arg[0]} = $named_arg[1];
578 } elsif ($i > $#{$macro_args{$macro}}) {
579 # more args given than the macro has named args
580 # XXX: is vararg allowed on arguments before the last?
581 $argname = $macro_args{$macro}[-1];
582 if ($argname =~ s/:vararg$//) {
583 #print "macro = $macro, args[$i] = $args[$i], args_seperator=@args_seperator, argname = $argname, arglist[$i] = $arglist[$i], arglist = @arglist, args=@args, macro_args=@macro_args\n";
584 #$replacements{$argname} .= ", $args[$i]";
585 $replacements{$argname} .= "$args_seperator[$i] $args[$i]";
587 die "Too many arguments to macro $macro";
590 $argname =~ s/:vararg$//;
591 $replacements{$argname} = $args[$i];
595 my $count = $macro_count++;
597 # apply replacements as regex
598 foreach (@{$macro_lines{$macro}}) {
600 # do replacements by longest first, this avoids wrong replacement
601 # when argument names are subsets of each other
602 foreach (reverse sort {length $a <=> length $b} keys %replacements) {
603 $macro_line =~ s/\\$_/$replacements{$_}/g;
606 foreach (reverse sort {length $a <=> length $b} keys %replacements) {
607 $macro_line =~ s/\b$_\b/$replacements{$_}/g;
610 $macro_line =~ s/\\\@/$count/g;
611 $macro_line =~ s/\\\(\)//g; # remove \()
612 parse_line($macro_line);
615 handle_serialized_line($line);
619 sub is_arm_register {
623 $name =~ /^[rav]\d+$/) {
629 sub handle_local_label {
633 my $target = "$num$dir";
635 $line =~ s/$target/$last_temp_labels{$num}/g;
637 my $name = "temp_label_$temp_label_next";
639 push(@{$next_temp_labels{$num}}, $name);
640 $line =~ s/$target/$name/g;
645 sub handle_serialized_line {
648 # handle .previous (only with regard to .section not .subsection)
649 if ($line =~ /\.(section|text|const_data)/) {
650 push(@sections, $line);
651 } elsif ($line =~ /\.previous/) {
652 if (!$sections[-2]) {
653 die ".previous without a previous section";
655 $line = $sections[-2];
656 push(@sections, $line);
659 $thumb = 1 if $line =~ /\.code\s+16|\.thumb/;
660 $thumb = 0 if $line =~ /\.code\s+32|\.arm/;
662 # handle ldr <reg>, =<expr>
663 if ($line =~ /(.*)\s*ldr([\w\s\d]+)\s*,\s*=(.*)/ and $as_type ne "armasm") {
664 my $label = $literal_labels{$3};
666 $label = "Literal_$literal_num";
668 $literal_labels{$3} = $label;
670 $line = "$1 ldr$2, $label\n";
671 } elsif ($line =~ /\.ltorg/ and $as_type ne "armasm") {
672 $line .= ".align 2\n";
673 foreach my $literal (keys %literal_labels) {
674 $line .= "$literal_labels{$literal}:\n $literal_expr $literal\n";
676 %literal_labels = ();
679 # handle GNU as pc-relative relocations for adrp/add
680 if ($line =~ /(.*)\s*adrp([\w\s\d]+)\s*,\s*#?:pg_hi21:([^\s]+)/) {
681 $line = "$1 adrp$2, ${3}\@PAGE\n";
682 } elsif ($line =~ /(.*)\s*add([\w\s\d]+)\s*,([\w\s\d]+)\s*,\s*#?:lo12:([^\s]+)/) {
683 $line = "$1 add$2, $3, ${4}\@PAGEOFF\n";
686 # thumb add with large immediate needs explicit add.w
687 if ($thumb and $line =~ /add\s+.*#([^@]+)/) {
688 $line =~ s/add/add.w/ if eval_expr($1) > 255;
691 # mach-o local symbol names start with L (no dot)
692 $line =~ s/(?<!\w)\.(L\w+)/$1/g;
694 # recycle the '.func' directive for '.thumb_func'
695 if ($thumb and $as_type =~ /^apple-/) {
696 $line =~ s/\.func/.thumb_func/x;
699 if ($thumb and $line =~ /^\s*(\w+)\s*:/) {
703 if ($as_type =~ /^apple-/ and
704 $line =~ /^\s*((\w+\s*:\s*)?bl?x?(..)?(?:\.w)?|\.global)\s+(\w+)/) {
707 # Don't interpret e.g. bic as b<cc> with ic as conditional code
708 if ($cond =~ /|$arm_cond_codes/) {
709 if (exists $thumb_labels{$label}) {
710 print ASMFILE ".thumb_func $label\n";
712 $call_targets{$label}++;
717 # @l -> lo16() @ha -> ha16()
718 $line =~ s/,\s+([^,]+)\@l\b/, lo16($1)/g;
719 $line =~ s/,\s+([^,]+)\@ha\b/, ha16($1)/g;
722 if ($line =~ /(\s+)(m[ft])([a-z]+)\s+(\w+)/ and exists $ppc_spr{$3}) {
724 $line = "$1${2}spr $ppc_spr{$3}, $4\n";
726 $line = "$1${2}spr $4, $ppc_spr{$3}\n";
730 if ($line =~ /\.unreq\s+(.*)/) {
731 if (defined $neon_alias_reg{$1}) {
732 delete $neon_alias_reg{$1};
733 delete $neon_alias_type{$1};
735 } elsif (defined $aarch64_req_alias{$1}) {
736 delete $aarch64_req_alias{$1};
740 # old gas versions store upper and lower case names on .req,
741 # but they remove only one on .unreq
743 if ($line =~ /\.unreq\s+(.*)/) {
744 $line = ".unreq " . lc($1) . "\n";
745 $line .= ".unreq " . uc($1) . "\n";
749 if ($line =~ /(\w+)\s+\.(dn|qn)\s+(\w+)(?:\.(\w+))?(\[\d+\])?/) {
750 $neon_alias_reg{$1} = "$3$5";
751 $neon_alias_type{$1} = $4;
754 if (scalar keys %neon_alias_reg > 0 && $line =~ /^\s+v\w+/) {
755 # This line seems to possibly have a neon instruction
756 foreach (keys %neon_alias_reg) {
758 # Require the register alias to match as an invididual word, not as a substring
759 # of a larger word-token.
760 if ($line =~ /\b$alias\b/) {
761 $line =~ s/\b$alias\b/$neon_alias_reg{$alias}/g;
762 # Add the type suffix. If multiple aliases match on the same line,
763 # only do this replacement the first time (a vfoo.bar string won't match v\w+).
764 $line =~ s/^(\s+)(v\w+)(\s+)/$1$2.$neon_alias_type{$alias}$3/;
769 if ($arch eq "aarch64" or $as_type eq "armasm") {
770 # clang's integrated aarch64 assembler in Xcode 5 does not support .req/.unreq
771 if ($line =~ /\b(\w+)\s+\.req\s+(\w+)\b/) {
772 $aarch64_req_alias{$1} = $2;
775 foreach (keys %aarch64_req_alias) {
777 # recursively resolve aliases
778 my $resolved = $aarch64_req_alias{$alias};
779 while (defined $aarch64_req_alias{$resolved}) {
780 $resolved = $aarch64_req_alias{$resolved};
782 $line =~ s/\b$alias\b/$resolved/g;
785 if ($arch eq "aarch64") {
786 # fix missing aarch64 instructions in Xcode 5.1 (beta3)
787 # mov with vector arguments is not supported, use alias orr instead
788 if ($line =~ /^\s*mov\s+(v\d[\.{}\[\]\w]+),\s*(v\d[\.{}\[\]\w]+)\b\s*$/) {
789 $line = " orr $1, $2, $2\n";
791 # movi 16, 32 bit shifted variant, shift is optional
792 if ($line =~ /^\s*movi\s+(v[0-3]?\d\.(?:2|4|8)[hsHS])\s*,\s*(#\w+)\b\s*$/) {
793 $line = " movi $1, $2, lsl #0\n";
795 # Xcode 5 misses the alias uxtl. Replace it with the more general ushll.
796 # Clang 3.4 misses the alias sxtl too. Replace it with the more general sshll.
797 if ($line =~ /^\s*(s|u)xtl(2)?\s+(v[0-3]?\d\.[248][hsdHSD])\s*,\s*(v[0-3]?\d\.(?:2|4|8|16)[bhsBHS])\b\s*$/) {
798 $line = " $1shll$2 $3, $4, #0\n";
800 # clang 3.4 does not automatically use shifted immediates in add/sub
801 if ($as_type eq "clang" and
802 $line =~ /^(\s*(?:add|sub)s?) ([^#l]+)#([\d\+\-\*\/ <>]+)\s*$/) {
804 if ($imm > 4095 and not ($imm & 4095)) {
805 $line = "$1 $2#" . ($imm >> 12) . ", lsl #12\n";
808 if ($ENV{GASPP_FIX_XCODE5}) {
809 if ($line =~ /^\s*bsl\b/) {
810 $line =~ s/\b(bsl)(\s+v[0-3]?\d\.(\w+))\b/$1.$3$2/;
811 $line =~ s/\b(v[0-3]?\d)\.$3\b/$1/g;
813 if ($line =~ /^\s*saddl2?\b/) {
814 $line =~ s/\b(saddl2?)(\s+v[0-3]?\d\.(\w+))\b/$1.$3$2/;
815 $line =~ s/\b(v[0-3]?\d)\.\w+\b/$1/g;
817 if ($line =~ /^\s*dup\b.*\]$/) {
818 $line =~ s/\bdup(\s+v[0-3]?\d)\.(\w+)\b/dup.$2$1/g;
819 $line =~ s/\b(v[0-3]?\d)\.[bhsdBHSD](\[\d\])$/$1$2/g;
824 if ($as_type eq "armasm") {
825 # Also replace variables set by .set
826 foreach (keys %symbols) {
828 $line =~ s/\b$sym\b/$symbols{$sym}/g;
831 # Handle function declarations and keep track of the declared labels
832 if ($line =~ s/^\s*\.func\s+(\w+)/$1 PROC/) {
833 $labels_seen{$1} = 1;
836 if ($line =~ s/^\s*(\d+)://) {
837 # Convert local labels into unique labels. armasm (at least in
838 # RVCT) has something similar, but still different enough.
839 # By converting to unique labels we avoid any possible
843 foreach (@{$next_temp_labels{$num}}) {
844 $line = "$_\n" . $line;
846 @next_temp_labels{$num} = ();
847 my $name = "temp_label_$temp_label_next";
849 # The matching regexp above removes the label from the start of
850 # the line (which might contain an instruction as well), readd
851 # it on a separate line above it.
852 $line = "$name:\n" . $line;
853 $last_temp_labels{$num} = $name;
856 if ($line =~ s/^(\w+):/$1/) {
857 # Skip labels that have already been declared with a PROC,
858 # labels must not be declared multiple times.
859 return if (defined $labels_seen{$1});
860 $labels_seen{$1} = 1;
861 } elsif ($line !~ /(\w+) PROC/) {
862 # If not a label, make sure the line starts with whitespace,
863 # otherwise ms armasm interprets it incorrectly.
864 $line =~ s/^[\.\w]/\t$&/;
868 # Check branch instructions
869 if ($line =~ /(?:^|\n)\s*(\w+\s*:\s*)?(bl?x?(..)?(\.w)?)\s+(\w+)/) {
874 # Don't interpret e.g. bic as b<cc> with ic as conditional code
875 if ($cond !~ /|$arm_cond_codes/) {
876 # Not actually a branch
877 } elsif ($target =~ /(\d+)([bf])/) {
878 # The target is a local label
879 $line = handle_local_label($line, $1, $2);
880 $line =~ s/\b$instr\b/$&.w/ if $width eq "";
881 } elsif (!is_arm_register($target)) {
882 $call_targets{$target}++;
884 } elsif ($line =~ /^\s*.h?word.*\b\d+[bf]\b/) {
885 while ($line =~ /\b(\d+)([bf])\b/g) {
886 $line = handle_local_label($line, $1, $2);
890 # ALIGN in armasm syntax is the actual number of bytes
891 if ($line =~ /\.align\s+(\d+)/) {
893 $line =~ s/\.align\s(\d+)/ALIGN $align/;
895 # Convert gas style [r0, :128] into armasm [r0@128] alignment specification
896 $line =~ s/\[([^\[]+),\s*:(\d+)\]/[$1\@$2]/g;
898 # armasm treats logical values {TRUE} and {FALSE} separately from
899 # numeric values - logical operators and values can't be intermixed
900 # with numerical values. Evaluate !<number> and (a <> b) into numbers,
901 # let the assembler evaluate the rest of the expressions. This current
902 # only works for cases when ! and <> are used with actual constant numbers,
903 # we don't evaluate subexpressions here.
906 while ($line =~ /!\s*(\d+)/g) {
907 my $val = ($1 != 0) ? 0 : 1;
908 $line =~ s/!(\d+)/$val/;
911 while ($line =~ /\(\s*(\d+)\s*([<>])\s*(\d+)\s*\)/) {
914 $val = ($1 < $3) ? 1 : 0;
916 $val = ($1 > $3) ? 1 : 0;
918 $line =~ s/\(\s*(\d+)\s*([<>])\s*(\d+)\s*\)/$val/;
921 # Change a movw... #:lower16: into a mov32 pseudoinstruction
922 $line =~ s/^(\s*)movw(\s+\w+\s*,\s*)\#:lower16:(.*)$/$1mov32$2$3/;
923 # and remove the following, matching movt completely
924 $line =~ s/^\s*movt\s+\w+\s*,\s*\#:upper16:.*$//;
926 if ($line =~ /^\s*mov32\s+\w+,\s*([a-zA-Z]\w*)/) {
927 $mov32_targets{$1}++;
930 # Misc bugs/deficiencies:
931 # armasm seems unable to parse e.g. "vmov s0, s1" without a type
932 # qualifier, thus add .f32.
933 $line =~ s/^(\s+(?:vmov|vadd))(\s+s)/$1.f32$2/;
934 # armasm is unable to parse &0x - add spacing
935 $line =~ s/&0x/& 0x/g;
939 # Convert register post indexing to a separate add instruction.
940 # This converts e.g. "ldr r0, [r1], r2" into "ldr r0, [r1]",
942 $line =~ s/(ldr|str)\s+(\w+),\s*\[(\w+)\],\s*(\w+)/$1 $2, [$3]\n\tadd $3, $3, $4/g;
944 # Convert "mov pc, lr" into "bx lr", since the former only works
945 # for switching from arm to thumb (and only in armv7), but not
947 s/mov\s*pc\s*,\s*lr/bx lr/g;
949 # Convert stmdb/ldmia with only one register into a plain str/ldr with post-increment/decrement
950 $line =~ s/stmdb\s+sp!\s*,\s*\{([^,-]+)\}/str $1, [sp, #-4]!/g;
951 $line =~ s/ldmia\s+sp!\s*,\s*\{([^,-]+)\}/ldr $1, [sp], #4/g;
953 $line =~ s/\.arm/.thumb/x;
956 # comment out unsupported directives
957 $line =~ s/\.type/$comm$&/x if $as_type =~ /^(apple-|armasm)/;
958 $line =~ s/\.func/$comm$&/x if $as_type =~ /^(apple-|clang)/;
959 $line =~ s/\.endfunc/$comm$&/x if $as_type =~ /^(apple-|clang)/;
960 $line =~ s/\.endfunc/ENDP/x if $as_type =~ /armasm/;
961 $line =~ s/\.ltorg/$comm$&/x if $as_type =~ /^(apple-|clang)/;
962 $line =~ s/\.ltorg/LTORG/x if $as_type eq "armasm";
963 $line =~ s/\.size/$comm$&/x if $as_type =~ /^(apple-|armasm)/;
964 $line =~ s/\.fpu/$comm$&/x if $as_type =~ /^(apple-|armasm)/;
965 $line =~ s/\.arch/$comm$&/x if $as_type =~ /^(apple-|clang|armasm)/;
966 $line =~ s/\.object_arch/$comm$&/x if $as_type =~ /^(apple-|armasm)/;
967 $line =~ s/.section\s+.note.GNU-stack.*/$comm$&/x if $as_type =~ /^(apple-|armasm)/;
969 $line =~ s/\.syntax/$comm$&/x if $as_type =~ /armasm/;
971 $line =~ s/\.hword/.short/x;
973 if ($as_type =~ /^apple-/) {
974 # the syntax for these is a little different
975 $line =~ s/\.global/.globl/x;
976 # also catch .section .rodata since the equivalent to .const_data is .section __DATA,__const
977 $line =~ s/(.*)\.rodata/.const_data/x;
978 $line =~ s/\.int/.long/x;
979 $line =~ s/\.float/.single/x;
981 if ($as_type eq "armasm") {
982 $line =~ s/\.global/EXPORT/x;
983 $line =~ s/\.int/dcd/x;
984 $line =~ s/\.long/dcd/x;
985 $line =~ s/\.float/dcfs/x;
986 $line =~ s/\.word/dcd/x;
987 $line =~ s/\.short/dcw/x;
988 $line =~ s/\.byte/dcb/x;
989 $line =~ s/\.thumb/THUMB/x;
990 $line =~ s/\.arm/ARM/x;
991 # The alignment in AREA is the power of two, just as .align in gas
992 $line =~ s/\.text/AREA |.text|, CODE, READONLY, ALIGN=2, CODEALIGN/;
993 $line =~ s/(\s*)(.*)\.rodata/$1AREA |.rodata|, DATA, READONLY, ALIGN=5/;
995 $line =~ s/fmxr/vmsr/;
996 $line =~ s/fmrx/vmrs/;
997 $line =~ s/fadds/vadd.f32/;
1000 # catch unknown section names that aren't mach-o style (with a comma)
1001 if ($as_type =~ /apple-/ and $line =~ /.section ([^,]*)$/) {
1002 die ".section $1 unsupported; figure out the mach-o section name and add it";
1005 print ASMFILE $line;
1008 if ($as_type ne "armasm") {
1009 print ASMFILE ".text\n";
1010 print ASMFILE ".align 2\n";
1011 foreach my $literal (keys %literal_labels) {
1012 print ASMFILE "$literal_labels{$literal}:\n $literal_expr $literal\n";
1015 map print(ASMFILE ".thumb_func $_\n"),
1016 grep exists $thumb_labels{$_}, keys %call_targets;
1018 map print(ASMFILE "\tIMPORT $_\n"),
1019 grep ! exists $labels_seen{$_}, (keys %call_targets, keys %mov32_targets);
1021 print ASMFILE "\tEND\n";
1024 close(INPUT) or exit 1;
1025 close(ASMFILE) or exit 1;
1026 if ($as_type eq "armasm" and ! defined $ENV{GASPP_DEBUG}) {
1027 system(@gcc_cmd) == 0 or die "Error running assembler";
1031 unlink($tempfile) if defined $tempfile;