Recursive descent parser example for C

Jack picture Jack · Apr 21, 2013 · Viewed 17.7k times · Source

I'm trying to learn about parsing expressions.I found recursive descent parse seems easy to do this. From wikipedia,I found an example in C. So,I start reading and editing this code to understand how it works. I written the missing routines according to descriptions on wikipedia's page,but it doesn't work from any expression as I expected. For example: 1+2*3+1; returns

error: statement: syntax error

Can someone explain what am I missing?

The current C code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef enum {ident, number, lparen, rparen, 
          times, // symbol *
          slash, // symbol \ not added yet
          plus, // symbol +
          minus, //symbol -
          eql, //symbol ==
          neq, // !=
          lss, // <
          leq,// <=
          gtr, // >
          geq, // >= 
          callsym, // not added yet 
          beginsym, // not added yet 
          semicolon, // ;
          endsym, 
          ifsym, whilesym, becomes, thensym, dosym, constsym, 
          comma, //:
          varsym, procsym, period, oddsym, 
          not, // !
          eq // =
} Symbol;

Symbol sym;
int peek;
void getsym(void);
void error(const char*);
void expression(void);
void program(void);
void nexttok(void);

#define is_num(c) ((c) >= '0' && (c) <= '9')
#define is_letter(c) ((c) <= 'a' && (c) <= 'z' || (c) >= 'A' && (c) <= 'Z')

int main(void)
{
  program();
  return 0;
}

void nexttok(void)
{
  peek = getchar();
}

void getsym(void)
{
  for(;;) {
    nexttok();
    if(peek == ' ' || peek == '\t') continue;
    else if(peek == EOF) break;
    else break;
  }
  //static char buf[256];
  switch(peek) {
  case '+': sym = plus; return;
  case '-': sym = minus; return;
  case '*': sym = times; return;
  case ';': sym = semicolon; return;
  case ':': sym = comma; return;
  case '=': 
    nexttok();
    if(peek == '=')
      sym = eql;
    else
      sym = eq;
    return;
  case '!': 
    nexttok();
    if(peek == '=')
      sym = neq;
    else
      sym = not;
    return;
  case '<':
    nexttok();
    if(peek == '=')
      sym = leq;
    else
      sym = lss;
    return;
  case '>':
    nexttok();
    if(peek == '=')
      sym = geq;
    else
      sym = gtr;
    return;
  }
  if(is_num(peek)) {
    sym = number;
    return;
  }
}

int accept(Symbol s) {
    if (sym == s) {
        getsym();
        return 1;
    }
    return 0;
}

int expect(Symbol s) {
    if (accept(s))
        return 1;
    error("expect: unexpected symbol");
    return 0;
}

void factor(void) {
    if (accept(ident)) {
        ;
    } else if (accept(number)) {
        ;
    } else if (accept(lparen)) {
        expression();
        expect(rparen);
    } else {
        error("factor: syntax error");
        getsym();
    }
}

void term(void) {
    factor();
    while (sym == times || sym == slash) {
        getsym();
        factor();
    }
}

void expression(void) {
    if (sym == plus || sym == minus)
        getsym();
    term();
    while (sym == plus || sym == minus) {
        getsym();
        term();
    }
}

void condition(void) {
    if (accept(oddsym)) {
        expression();
    } else {
        expression();
        if (sym == eql || sym == neq || sym == lss || sym == leq || sym == gtr || sym == geq) {
            getsym();
            expression();
        } else {
            error("condition: invalid operator");
            getsym();
        }
    }
}

void statement(void) {
    if (accept(ident)) {
        expect(becomes);
        expression();
    } else if (accept(callsym)) {
        expect(ident);
    } else if (accept(beginsym)) {
        do {
            statement();
        } while (accept(semicolon));
        expect(endsym);
    } else if (accept(ifsym)) {
        condition();
        expect(thensym);
        statement();
    } else if (accept(whilesym)) {
        condition();
        expect(dosym);
        statement();
    } else {
        error("statement: syntax error");
        getsym();
    }
}

void block(void) {
    if (accept(constsym)) {
        do {
            expect(ident);
            expect(eql);
            expect(number);
        } while (accept(comma));
        expect(semicolon);
    }
    if (accept(varsym)) {
        do {
            expect(ident);
        } while (accept(comma));
        expect(semicolon);
    }
    while (accept(procsym)) {
        expect(ident);
        expect(semicolon);
        block();
        expect(semicolon);
    }
    statement();
}

void program(void) {
    getsym();
    block();
    expect(period);
}

void error(const char *msg)
{
  fprintf(stderr,"error: %s\n",msg);
  exit(1);
}

Answer

icktoofay picture icktoofay · Apr 21, 2013

statement never calls expression, so all expressions are going to be syntax errors. To fix this, you'd need to change statement so it will call expression if sym is a valid symbol to start an expression. (accept would be unacceptable because it would consume the symbol and expression would not see it.)