From f296669c0f23af63716050593865475471054941 Mon Sep 17 00:00:00 2001 From: Adela Vais Date: Sat, 26 Sep 2020 07:12:42 +0200 Subject: [PATCH] d: change the return value of yylex from int to TokenKind * data/skeletons/lalr1.d: Change the return value. * examples/d/calc/calc.y, examples/d/simple/calc.y: Adjust. * tests/scanner.at: Adjust. * tests/calc.at (_AT_DATA_CALC_Y(d)): New, extracted from... (_AT_DATA_CALC_Y(c)): here. The two grammars have been sufficiently different to be separated. Still trying to be them together results in a maintenance burden. For the same reason, instead of specifying the results for D and for the rest, compute the expected results with D from the regular case. --- TODO | 15 ++--- data/skeletons/lalr1.d | 4 +- examples/d/calc/calc.y | 2 +- examples/d/simple/calc.y | 2 +- tests/calc.at | 137 ++++++++++++++++++++++++++++++++------- tests/scanner.at | 2 +- 6 files changed, 124 insertions(+), 38 deletions(-) diff --git a/TODO b/TODO index c692b9f95..8cb1a3099 100644 --- a/TODO +++ b/TODO @@ -249,21 +249,14 @@ are. Keep the same variable names. If you change the wording in one place, do it in the others too. In other words: make sure to keep the maintenance *simple* by avoiding any gratuitous difference. -** Change the return value of yylex -Historically people were allowed to return any int from the scanner (which -is convenient and allows `return '+'` from the scanner). Akim tends to see -this as an error, we should restrict the return values to TokenKind (not to -be confused with SymbolKind). - -In the case of D, without the history, we have the choice to support or not -`int`. If we want to _keep_ `int`, is there a way, say via introspection, -to support both signatures of yylex? If we don't keep `int`, just move to -TokenKind. - ** Documentation Write documentation about D support in doc/bison.texi. Imitate the Java documentation. You should be more succinct IMHO. +** yyerrok +It appears that neither Java nor D support yyerrok currently. It does not +need to be named this way... + ** Complete Symbols The current interface from the scanner to the parser is somewhat clumsy: the token kind is returned by yylex, but the value and location are stored in diff --git a/data/skeletons/lalr1.d b/data/skeletons/lalr1.d index e879dabf1..6d1bdfddd 100644 --- a/data/skeletons/lalr1.d +++ b/data/skeletons/lalr1.d @@ -68,7 +68,7 @@ public interface Lexer * to the next token and prepares to return the semantic value * ]b4_locations_if([and beginning/ending positions ])[of the token. * @@return the token identifier corresponding to the next token. */ - int yylex (); + TokenKind yylex (); /** * Entry point for error reporting. Emits an error @@ -272,7 +272,7 @@ b4_user_union_members yyDebugStream.writeln (s); } ]])[ - private final int yylex () { + private final TokenKind yylex () { return yylexer.yylex (); } diff --git a/examples/d/calc/calc.y b/examples/d/calc/calc.y index 2ad1227af..9fea82cd0 100644 --- a/examples/d/calc/calc.y +++ b/examples/d/calc/calc.y @@ -114,7 +114,7 @@ class CalcLexer(R) : Lexer return semanticVal_; } - int yylex () + TokenKind yylex () { import std.uni : isWhite, isNumber; diff --git a/examples/d/simple/calc.y b/examples/d/simple/calc.y index 917eb131a..0f4414315 100644 --- a/examples/d/simple/calc.y +++ b/examples/d/simple/calc.y @@ -109,7 +109,7 @@ class CalcLexer(R) : Lexer return semanticVal_; } - int yylex () + TokenKind yylex () { import std.uni : isWhite, isNumber; diff --git a/tests/calc.at b/tests/calc.at index f10335173..e017e5e47 100644 --- a/tests/calc.at +++ b/tests/calc.at @@ -299,7 +299,7 @@ class CalcLexer(R) : Lexer return res; } - int yylex () + TokenKind yylex () {]AT_LOCATION_IF([[ location.begin = location.end;]])[ @@ -342,7 +342,20 @@ class CalcLexer(R) : Lexer return TokenKind.YYerror; } - return c; + switch (c) + { + case '+': return TokenKind.PLUS; + case '-': return TokenKind.MINUS; + case '*': return TokenKind.STAR; + case '/': return TokenKind.SLASH; + case '(': return TokenKind.LPAR; + case ')': return TokenKind.RPAR; + case '\n': return TokenKind.EOL; + case '=': return TokenKind.EQUAL; + case '^': return TokenKind.POW; + case '!': return TokenKind.NOT; + default: return TokenKind.YYUNDEF; + } } } ]]) @@ -444,13 +457,6 @@ m4_define([_AT_DATA_CALC_Y(c)], [AT_DATA_GRAMMAR([calc.y], [[/* Infix notation calculator--calc */ ]$4[ -]AT_LANG_MATCH( -[d], [[ -%code imports { - alias semantic_value = int; -} -]], -[c\|c++], [[ %code requires { ]AT_LOCATION_TYPE_SPAN_IF([[ @@ -489,7 +495,6 @@ void location_print (FILE *o, Span s); /* Exercise pre-prologue dependency to %union. */ typedef int semantic_value; } -]])[ /* Exercise %union. */ %union @@ -592,9 +597,7 @@ exp: char buf[1024]; snprintf (buf, sizeof buf, "error: %d != %d", $1, $3); ]AT_GLR_IF([[yyparser.]])[error (]AT_LOCATION_IF([[@$, ]])[buf); - }]], - [d], [[ - yyerror (]AT_LOCATION_IF([[@$, ]])[format ("error: %d != %d", $1, $3));]])[ + }]])[ $$ = $1; } | exp '+' exp { $$ = $1 + $3; } @@ -617,18 +620,16 @@ exp: [c++], [[ { ]AT_GLR_IF([[yyparser.]])[error (]AT_LOCATION_IF([[@3, ]])["error: null divisor"); - }]], - [d], [[ - yyerror (]AT_LOCATION_IF([[@3, ]])["error: null divisor");]])[ + }]])[ else $$ = $1 / $3; } | '-' exp %prec NEG { $$ = -$2; } | exp '^' exp { $$ = power ($1, $3); } | '(' exp ')' { $$ = $2; } -| '(' error ')' { $$ = 1111; ]AT_D_IF([], [yyerrok;])[ } -| '!' { $$ = 0; ]AT_D_IF([return YYERROR], [YYERROR])[; } -| '-' error { $$ = 0; ]AT_D_IF([return YYERROR], [YYERROR])[; } +| '(' error ')' { $$ = 1111; yyerrok; } +| '!' { $$ = 0; YYERROR; } +| '-' error { $$ = 0; YYERROR; } ; %% @@ -682,11 +683,100 @@ AT_DATA_SOURCE([[calc-main.]AT_LANG_EXT], ]AT_CALC_MAIN]) ]) -])# _AT_DATA_CALC_Y +])# _AT_DATA_CALC_Y(c) m4_copy([_AT_DATA_CALC_Y(c)], [_AT_DATA_CALC_Y(c++)]) -m4_copy([_AT_DATA_CALC_Y(c)], [_AT_DATA_CALC_Y(d)]) + +m4_define([_AT_DATA_CALC_Y(d)], +[AT_DATA_GRAMMAR([calc.y], +[[/* Infix notation calculator--calc */ +]$4[ +%code imports { + alias semantic_value = int; +} +/* Exercise %union. */ +%union +{ + semantic_value ival; +}; +%printer { fprintf (yyo, "%d", $$); } ; + +/* Bison Declarations */ +%token CALC_EOF 0 ]AT_TOKEN_TRANSLATE_IF([_("end of input")], ["end of input"])[ +%token NUM "number" +%type exp + +%token PLUS "+" + MINUS "-" + STAR "*" + SLASH "/" + LPAR "(" + RPAR ")" + EQUAL "=" + POW "^" + NOT "!" + EOL "\n" + +%nonassoc "=" /* comparison */ +%left "-" "+" +%left "*" "/" +%precedence NEG /* negation--unary minus */ +%right "^" /* exponentiation */ + +/* Grammar follows */ +%% +input: + line +| input line { ]AT_PARAM_IF([++*count; ++global_count;])[ } +; + +line: + EOL +| exp EOL { ]AT_PARAM_IF([*result = global_result = $1;], [AT_D_IF([], [USE ($1);])])[ } +; + +exp: + NUM +| exp "=" exp + { + if ($1 != $3) + yyerror (]AT_LOCATION_IF([[@$, ]])[format ("error: %d != %d", $1, $3)); + $$ = $1; + } +| exp "+" exp { $$ = $1 + $3; } +| exp "-" exp { $$ = $1 - $3; } +| exp "*" exp { $$ = $1 * $3; } +| exp "/" exp + { + if ($3 == 0) + yyerror (]AT_LOCATION_IF([[@3, ]])["error: null divisor"); + else + $$ = $1 / $3; + } +| "-" exp %prec NEG { $$ = -$2; } +| exp "^" exp { $$ = power ($1, $3); } +| "(" exp ")" { $$ = $2; } +| "(" error ")" { $$ = 1111; ]AT_D_IF([], [yyerrok;])[ } +| "!" { $$ = 0; return YYERROR; } +| "-" error { $$ = 0; return YYERROR; } +; +%% + +int +power (int base, int exponent) +{ + int res = 1; + assert (0 <= exponent); + for (/* Niente */; exponent; --exponent) + res *= base; + return res; +} + +]AT_YYERROR_DEFINE[ +]AT_CALC_YYLEX +AT_CALC_MAIN]) +])# _AT_DATA_CALC_Y(d) m4_define([_AT_DATA_CALC_Y(java)], [AT_DATA_GRAMMAR([Calc.y], @@ -883,7 +973,10 @@ AT_PERL_REQUIRE([[-pi -e 'use strict; s{syntax error on token \[(.*?)\] \(expected: (.*)\)} { my $unexp = $][1; - my @exps = $][2 =~ /\[(.*?)\]/g; + my @exps = $][2 =~ /\[(.*?)\]/g;]AT_D_IF([[ + # In the case of D, there are no single quotes around the symbols. + $unexp =~ s/'"'(.)'"'/$][1/g; + s/'"'(.)'"'/$][1/g for @exps;]])[ ($][#exps && $][#exps < 4) ? "syntax error, unexpected $unexp, expecting @{[join(\" or \", @exps)]}" : "syntax error, unexpected $unexp"; diff --git a/tests/scanner.at b/tests/scanner.at index 5ad187296..2ec2cd789 100644 --- a/tests/scanner.at +++ b/tests/scanner.at @@ -121,7 +121,7 @@ class YYLexer(R) : Lexer return semanticVal_; } - int yylex () + TokenKind yylex () { import std.uni : isNumber; // Handle EOF.