Skip to content

Commit 947eef8

Browse files
committed
[AArch64] Refactor @plt, @gotpcrel, and @AUTH to use parseDataExpr
Following PR llvm#132569, which added `parseDataExpr` for parsing expressions in data directives (e.g., `.word`), this PR migrates AArch64 `@plt`, `@gotpcrel`, and `@AUTH` from the `parsePrimaryExpr` workaround to `parseDataExpr`. The goal is to align with the GNU assembler model, where relocation specifiers apply to the entire operand rather than individual terms, reducing complexity-especially evident in `@AUTH` parsing. Note: AArch64 ELF lacks an official syntax for data directives (llvm#132570). A prefix notation might be a preferable future direction. In the test elf-reloc-ptrauth.s, many errors are now reported at parse time.
1 parent 36978fa commit 947eef8

File tree

10 files changed

+137
-135
lines changed

10 files changed

+137
-135
lines changed

llvm/include/llvm/MC/MCParser/MCAsmLexer.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,9 @@ class MCAsmLexer {
148148
void setSkipSpace(bool val) { SkipSpace = val; }
149149

150150
bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
151-
void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
151+
bool setAllowAtInIdentifier(bool v) {
152+
return std::exchange(AllowAtInIdentifier, v);
153+
}
152154

153155
void setAllowHashInIdentifier(bool V) { AllowHashInIdentifier = V; }
154156

llvm/include/llvm/MC/MCParser/MCAsmParser.h

+6-1
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,10 @@ class MCAsmParser {
136136
MCTargetAsmParser *TargetParser = nullptr;
137137

138138
protected: // Can only create subclasses.
139-
MCAsmParser();
139+
MCAsmParser(MCContext &, const MCAsmInfo &);
140140

141+
MCContext &Ctx;
142+
const MCAsmInfo &MAI;
141143
SmallVector<MCPendingError, 0> PendingErrors;
142144

143145
/// Flag tracking whether any errors have been encountered.
@@ -333,6 +335,9 @@ class MCAsmParser {
333335

334336
/// Parse a .gnu_attribute.
335337
bool parseGNUAttribute(SMLoc L, int64_t &Tag, int64_t &IntegerValue);
338+
339+
bool parseAtSpecifier(const MCExpr *&Res, SMLoc &EndLoc);
340+
const MCExpr *applySpecifier(const MCExpr *E, uint32_t Variant);
336341
};
337342

338343
/// Create an MCAsmParser instance for parsing assembly similar to gas syntax

llvm/lib/MC/MCParser/AsmParser.cpp

+26-9
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/ADT/APInt.h"
1515
#include "llvm/ADT/ArrayRef.h"
1616
#include "llvm/ADT/STLExtras.h"
17+
#include "llvm/ADT/ScopeExit.h"
1718
#include "llvm/ADT/SmallSet.h"
1819
#include "llvm/ADT/SmallString.h"
1920
#include "llvm/ADT/SmallVector.h"
@@ -118,9 +119,7 @@ struct ParseStatementInfo {
118119
class AsmParser : public MCAsmParser {
119120
private:
120121
AsmLexer Lexer;
121-
MCContext &Ctx;
122122
MCStreamer &Out;
123-
const MCAsmInfo &MAI;
124123
SourceMgr &SrcMgr;
125124
SourceMgr::DiagHandlerTy SavedDiagHandler;
126125
void *SavedDiagContext;
@@ -680,8 +679,6 @@ class AsmParser : public MCAsmParser {
680679
bool parseEscapedString(std::string &Data) override;
681680
bool parseAngleBracketString(std::string &Data) override;
682681

683-
const MCExpr *applySpecifier(const MCExpr *E, uint32_t Variant);
684-
685682
// Macro-like directives
686683
MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc);
687684
void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
@@ -773,7 +770,7 @@ enum { DEFAULT_ADDRSPACE = 0 };
773770

774771
AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
775772
const MCAsmInfo &MAI, unsigned CB = 0)
776-
: Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM),
773+
: MCAsmParser(Ctx, MAI), Lexer(MAI), Out(Out), SrcMgr(SM),
777774
CurBuffer(CB ? CB : SM.getMainFileID()), MacrosEnabledFlag(true) {
778775
HadError = false;
779776
// Save the old handler.
@@ -1204,7 +1201,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
12041201

12051202
Split = std::make_pair(Identifier, VName);
12061203
}
1207-
} else {
1204+
} else if (Lexer.getAllowAtInIdentifier()) {
12081205
Split = Identifier.split('@');
12091206
}
12101207
} else if (MAI.useParensForSpecifier() &&
@@ -1352,7 +1349,7 @@ bool AsmParser::parseExpression(const MCExpr *&Res) {
13521349
return parseExpression(Res, EndLoc);
13531350
}
13541351

1355-
const MCExpr *AsmParser::applySpecifier(const MCExpr *E, uint32_t Spec) {
1352+
const MCExpr *MCAsmParser::applySpecifier(const MCExpr *E, uint32_t Spec) {
13561353
// Ask the target implementation about this expression first.
13571354
const MCExpr *NewE = getTargetParser().applySpecifier(E, Spec, Ctx);
13581355
if (NewE)
@@ -1443,6 +1440,27 @@ static std::string angleBracketString(StringRef AltMacroStr) {
14431440
return Res;
14441441
}
14451442

1443+
bool MCAsmParser::parseAtSpecifier(const MCExpr *&Res, SMLoc &EndLoc) {
1444+
bool SavedAllowAt = getLexer().getAllowAtInIdentifier();
1445+
getLexer().setAllowAtInIdentifier(true);
1446+
auto _ = make_scope_exit(
1447+
[&]() { getLexer().setAllowAtInIdentifier(SavedAllowAt); });
1448+
if (parseOptionalToken(AsmToken::At)) {
1449+
if (getLexer().isNot(AsmToken::Identifier))
1450+
return TokError("expected specifier following '@'");
1451+
1452+
auto Spec = MAI.getSpecifierForName(getTok().getIdentifier());
1453+
if (!Spec)
1454+
return TokError("invalid specifier '@" + getTok().getIdentifier() + "'");
1455+
1456+
const MCExpr *ModifiedRes = applySpecifier(Res, *Spec);
1457+
if (ModifiedRes)
1458+
Res = ModifiedRes;
1459+
Lex();
1460+
}
1461+
return false;
1462+
}
1463+
14461464
/// Parse an expression and return it.
14471465
///
14481466
/// expr ::= expr &&,|| expr -> lowest.
@@ -1463,8 +1481,7 @@ bool AsmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
14631481
// As a special case, we support 'a op b @ modifier' by rewriting the
14641482
// expression to include the modifier. This is inefficient, but in general we
14651483
// expect users to use 'a@modifier op b'.
1466-
if (Ctx.getAsmInfo()->useAtForSpecifier() &&
1467-
parseOptionalToken(AsmToken::At)) {
1484+
if (Lexer.getAllowAtInIdentifier() && parseOptionalToken(AsmToken::At)) {
14681485
if (Lexer.isNot(AsmToken::Identifier))
14691486
return TokError("unexpected symbol modifier following '@'");
14701487

llvm/lib/MC/MCParser/MCAsmParser.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ cl::opt<unsigned> AsmMacroMaxNestingDepth(
2727
cl::desc("The maximum nesting depth allowed for assembly macros."));
2828
}
2929

30-
MCAsmParser::MCAsmParser() = default;
30+
MCAsmParser::MCAsmParser(MCContext &Ctx, const MCAsmInfo &MAI)
31+
: Ctx(Ctx), MAI(MAI) {}
3132

3233
MCAsmParser::~MCAsmParser() = default;
3334

llvm/lib/MC/MCParser/MasmParser.cpp

+1-3
Original file line numberDiff line numberDiff line change
@@ -376,9 +376,7 @@ FieldInitializer &FieldInitializer::operator=(FieldInitializer &&Initializer) {
376376
class MasmParser : public MCAsmParser {
377377
private:
378378
AsmLexer Lexer;
379-
MCContext &Ctx;
380379
MCStreamer &Out;
381-
const MCAsmInfo &MAI;
382380
SourceMgr &SrcMgr;
383381
SourceMgr::DiagHandlerTy SavedDiagHandler;
384382
void *SavedDiagContext;
@@ -973,7 +971,7 @@ enum { DEFAULT_ADDRSPACE = 0 };
973971

974972
MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
975973
const MCAsmInfo &MAI, struct tm TM, unsigned CB)
976-
: Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM),
974+
: MCAsmParser(Ctx, MAI), Lexer(MAI), Out(Out), SrcMgr(SM),
977975
CurBuffer(CB ? CB : SM.getMainFileID()), TM(TM) {
978976
HadError = false;
979977
// Save the old handler.

llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp

+64-54
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,15 @@
1818
#include "llvm/ADT/APInt.h"
1919
#include "llvm/ADT/ArrayRef.h"
2020
#include "llvm/ADT/STLExtras.h"
21+
#include "llvm/ADT/ScopeExit.h"
2122
#include "llvm/ADT/SmallSet.h"
2223
#include "llvm/ADT/SmallVector.h"
2324
#include "llvm/ADT/StringExtras.h"
2425
#include "llvm/ADT/StringMap.h"
2526
#include "llvm/ADT/StringRef.h"
2627
#include "llvm/ADT/StringSwitch.h"
2728
#include "llvm/ADT/Twine.h"
29+
#include "llvm/MC/MCAsmInfo.h"
2830
#include "llvm/MC/MCContext.h"
2931
#include "llvm/MC/MCExpr.h"
3032
#include "llvm/MC/MCInst.h"
@@ -180,6 +182,7 @@ class AArch64AsmParser : public MCTargetAsmParser {
180182
bool showMatchError(SMLoc Loc, unsigned ErrCode, uint64_t ErrorInfo,
181183
OperandVector &Operands);
182184

185+
bool parseDataExpr(const MCExpr *&Res) override;
183186
bool parseAuthExpr(const MCExpr *&Res, SMLoc &EndLoc);
184187

185188
bool parseDirectiveArch(SMLoc L);
@@ -335,8 +338,6 @@ class AArch64AsmParser : public MCTargetAsmParser {
335338
unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
336339
unsigned Kind) override;
337340

338-
bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
339-
340341
static bool classifySymbolRef(const MCExpr *Expr,
341342
AArch64MCExpr::Specifier &ELFSpec,
342343
MCSymbolRefExpr::VariantKind &DarwinRefKind,
@@ -4478,6 +4479,18 @@ bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
44784479
if (HasELFModifier)
44794480
ImmVal = AArch64MCExpr::create(ImmVal, RefKind, getContext());
44804481

4482+
SMLoc EndLoc;
4483+
if (getContext().getAsmInfo()->hasSubsectionsViaSymbols()) {
4484+
if (getParser().parseAtSpecifier(ImmVal, EndLoc))
4485+
return true;
4486+
const MCExpr *Term;
4487+
if (parseOptionalToken(AsmToken::Plus)) {
4488+
if (getParser().parseExpression(Term, EndLoc))
4489+
return true;
4490+
ImmVal = MCBinaryExpr::create(MCBinaryExpr::Add, ImmVal, Term, getContext());
4491+
}
4492+
}
4493+
44814494
return false;
44824495
}
44834496

@@ -5007,11 +5020,17 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
50075020

50085021
// This was not a register so parse other operands that start with an
50095022
// identifier (like labels) as expressions and create them as immediates.
5010-
const MCExpr *IdVal;
5023+
const MCExpr *IdVal, *Term;
50115024
S = getLoc();
50125025
if (getParser().parseExpression(IdVal))
50135026
return true;
5014-
E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
5027+
if (getParser().parseAtSpecifier(IdVal, E))
5028+
return true;
5029+
if (parseOptionalToken(AsmToken::Plus)) {
5030+
if (getParser().parseExpression(Term, E))
5031+
return true;
5032+
IdVal = MCBinaryExpr::create(MCBinaryExpr::Add, IdVal, Term, getContext());
5033+
}
50155034
Operands.push_back(AArch64Operand::CreateImm(IdVal, S, E, getContext()));
50165035

50175036
// Parse an optional shift/extend modifier.
@@ -8086,11 +8105,48 @@ bool AArch64AsmParser::parseDirectiveAeabiAArch64Attr(SMLoc L) {
80868105
return false;
80878106
}
80888107

8089-
bool AArch64AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
8090-
// Try @AUTH expressions: they're more complex than the usual symbol variants.
8091-
if (!parseAuthExpr(Res, EndLoc))
8108+
bool AArch64AsmParser::parseDataExpr(const MCExpr *&Res) {
8109+
SMLoc EndLoc;
8110+
8111+
if (getParser().parseExpression(Res))
8112+
return true;
8113+
MCAsmParser &Parser = getParser();
8114+
if (!parseOptionalToken(AsmToken::At))
80928115
return false;
8093-
return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
8116+
if (getLexer().getKind() != AsmToken::Identifier)
8117+
return Error(getLoc(), "expected relocation specifier");
8118+
8119+
std::string Identifier = Parser.getTok().getIdentifier().lower();
8120+
SMLoc Loc = getLoc();
8121+
Lex();
8122+
if (Identifier == "auth")
8123+
return parseAuthExpr(Res, EndLoc);
8124+
8125+
auto Spec = MCSymbolRefExpr::VK_PLT;
8126+
if (Identifier == "gotpcrel")
8127+
Spec = MCSymbolRefExpr::VK_GOTPCREL;
8128+
else if (Identifier != "plt")
8129+
return Error(Loc, "invalid relocation specifier");
8130+
if (auto *SRE = dyn_cast<MCSymbolRefExpr>(Res))
8131+
Res = MCSymbolRefExpr::create(&SRE->getSymbol(), Spec, getContext(),
8132+
SRE->getLoc());
8133+
else
8134+
return Error(Loc, "this relocation specifier must follow a symbol");
8135+
8136+
for (;;) {
8137+
std::optional<MCBinaryExpr::Opcode> Opcode;
8138+
if (parseOptionalToken(AsmToken::Plus))
8139+
Opcode = MCBinaryExpr::Add;
8140+
else if (parseOptionalToken(AsmToken::Minus))
8141+
Opcode = MCBinaryExpr::Sub;
8142+
else
8143+
break;
8144+
const MCExpr *Term;
8145+
if (getParser().parsePrimaryExpr(Term, EndLoc, nullptr))
8146+
return true;
8147+
Res = MCBinaryExpr::create(*Opcode, Res, Term, getContext());
8148+
}
8149+
return false;
80948150
}
80958151

80968152
/// parseAuthExpr
@@ -8100,54 +8156,8 @@ bool AArch64AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
81008156
bool AArch64AsmParser::parseAuthExpr(const MCExpr *&Res, SMLoc &EndLoc) {
81018157
MCAsmParser &Parser = getParser();
81028158
MCContext &Ctx = getContext();
8103-
81048159
AsmToken Tok = Parser.getTok();
81058160

8106-
// Look for '_sym@AUTH' ...
8107-
if (Tok.is(AsmToken::Identifier) && Tok.getIdentifier().ends_with("@AUTH")) {
8108-
StringRef SymName = Tok.getIdentifier().drop_back(strlen("@AUTH"));
8109-
if (SymName.contains('@'))
8110-
return TokError(
8111-
"combination of @AUTH with other modifiers not supported");
8112-
Res = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(SymName), Ctx);
8113-
8114-
Parser.Lex(); // Eat the identifier.
8115-
} else {
8116-
// ... or look for a more complex symbol reference, such as ...
8117-
SmallVector<AsmToken, 6> Tokens;
8118-
8119-
// ... '"_long sym"@AUTH' ...
8120-
if (Tok.is(AsmToken::String))
8121-
Tokens.resize(2);
8122-
// ... or '(_sym + 5)@AUTH'.
8123-
else if (Tok.is(AsmToken::LParen))
8124-
Tokens.resize(6);
8125-
else
8126-
return true;
8127-
8128-
if (Parser.getLexer().peekTokens(Tokens) != Tokens.size())
8129-
return true;
8130-
8131-
// In either case, the expression ends with '@' 'AUTH'.
8132-
if (Tokens[Tokens.size() - 2].isNot(AsmToken::At) ||
8133-
Tokens[Tokens.size() - 1].isNot(AsmToken::Identifier) ||
8134-
Tokens[Tokens.size() - 1].getIdentifier() != "AUTH")
8135-
return true;
8136-
8137-
if (Tok.is(AsmToken::String)) {
8138-
StringRef SymName;
8139-
if (Parser.parseIdentifier(SymName))
8140-
return true;
8141-
Res = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(SymName), Ctx);
8142-
} else {
8143-
if (Parser.parsePrimaryExpr(Res, EndLoc, nullptr))
8144-
return true;
8145-
}
8146-
8147-
Parser.Lex(); // '@'
8148-
Parser.Lex(); // 'AUTH'
8149-
}
8150-
81518161
// At this point, we encountered "<id>@AUTH". There is no fallback anymore.
81528162
if (parseToken(AsmToken::LParen, "expected '('"))
81538163
return true;

llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin(bool IsILP32) {
6161
UsesELFSectionDirectiveForBSS = true;
6262
SupportsDebugInformation = true;
6363
UseDataRegionDirectives = true;
64+
UseAtForSpecifier = false;
6465

6566
ExceptionsType = ExceptionHandling::DwarfCFI;
6667

@@ -105,6 +106,7 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) {
105106
Data64bitsDirective = "\t.xword\t";
106107

107108
UseDataRegionDirectives = false;
109+
UseAtForSpecifier = false;
108110

109111
WeakRefDirective = "\t.weak\t";
110112

llvm/test/MC/AArch64/data-directive-specifier.s

+13-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# RUN: llvm-mc -triple=aarch64 -filetype=obj %s | llvm-readobj -r - | FileCheck %s
2-
# RUN: not llvm-mc -triple=aarch64 -filetype=obj %s --defsym ERR=1 -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error:
2+
# RUN: not llvm-mc -triple=aarch64 %s --defsym ERR=1 -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error:
3+
# RUN: not llvm-mc -triple=aarch64 -filetype=obj %s --defsym OBJERR=1 -o /dev/null 2>&1 | FileCheck %s --check-prefix=OBJERR --implicit-check-not=error:
34

45
.globl g
56
g:
@@ -32,13 +33,21 @@ data1:
3233
.word extern@GOTPCREL-5
3334

3435
.ifdef ERR
35-
# ERR: [[#@LINE+1]]:7: error: symbol 'und' can not be undefined in a subtraction expression
36-
.word extern@plt - und
36+
# ERR: [[#@LINE+1]]:9: error: this relocation specifier must follow a symbol
37+
.quad 3@plt - .
38+
39+
# ERR: [[#@LINE+1]]:9: error: expected ')'
40+
.quad (l@plt - .)
41+
.endif
3742

43+
.ifdef OBJERR
3844
.quad g@plt - .
3945

4046
.word extern@gotpcrel - .
4147

42-
# ERR: [[#@LINE+1]]:7: error: symbol 'und' can not be undefined in a subtraction expression
48+
# OBJERR: [[#@LINE+1]]:7: error: symbol 'und' can not be undefined in a subtraction expression
49+
.word extern@plt - und
50+
51+
# OBJERR: [[#@LINE+1]]:7: error: symbol 'und' can not be undefined in a subtraction expression
4352
.word extern@gotpcrel - und
4453
.endif

0 commit comments

Comments
 (0)