Source Code (Use browser search to find items of interest.)

Class Index

kdelibs'Lexer (./kdelibs/kjs/lexer.h:30)

  class Lexer {
  public:
    Lexer();
    ~Lexer();
    static Lexer *curr();
    static void setCurrent(Lexer *l);

    void setCode(const UChar *c, unsigned int len);
    int lex();

    int lineNo() const { return yylineno + 1; }

    bool prevTerminator() const { return terminator; }

    enum State { Start,
		 Identifier,
		 InIdentifier,
		 InSingleLineComment,
		 InMultiLineComment,
		 InNum,
		 InNum0,
		 InHex,
		 InOctal,
		 InDecimal,
		 InExponentIndicator,
		 InExponent,
		 Int,
		 Hex,
		 Octal,
		 Decimal,
		 String,
		 Eof,
		 InString,
		 InEscapeSequence,
		 InHexEscape,
		 InUnicodeEscape,
		 Other,
		 Bad };
    
    bool scanRegExp();
    UString pattern, flags;

  private:
    int yylineno;
    bool done;
    char *buffer8;
    UChar *buffer16;
    unsigned int size8, size16;
    unsigned int pos8, pos16;
    bool terminator;
    bool restrKeyword;
    int stackToken;

    State state;
    void setDone(State s);
    unsigned int pos;
    void shift(unsigned int p);
    int lookupKeyword(const char *);

    bool isWhiteSpace() const;
    bool isLineTerminator() const;
    bool isIdentLetter() const;
    bool isDecimalDigit(unsigned short c) const;
    bool isHexDigit(unsigned short c) const;
    bool isOctalDigit(unsigned short c) const;

    int matchPunctuator(unsigned short c1, unsigned short c2,
			unsigned short c3, unsigned short c4);
    unsigned char singleEscape(unsigned short c) const;
    unsigned short convertOctal(unsigned short c1, unsigned short c2,
                                unsigned short c3) const;
    unsigned char convertHex(unsigned short c1) const;
    unsigned char convertHex(unsigned short c1, unsigned short c2) const;
    UChar convertUnicode(unsigned short c1, unsigned short c2,
                               unsigned short c3, unsigned short c4) const;

    void record8(unsigned short c);
    void record16(unsigned char c);
    void record16(UChar c);

    const UChar *code;
    unsigned int length;
    int yycolumn;

    // current and following unicode characters
    unsigned short current, next1, next2, next3;

    struct keyword {
      const char *name;
      int token;
    };

    // for future extensions
    class LexerPrivate;
    LexerPrivate *priv;
  };

}; // namespace

kdelibs'Lexer::Lexer() (./kdelibs/kjs/lexer.cpp:48)

Lexer::Lexer()
  : yylineno(0),
    size8(128), size16(128),
    stackToken(0), pos(0),
    code(0), length(0),
    current(0), next1(0), next2(0), next3(0)
{
  // allocate space for read buffers
  buffer8 = new char[size8];
  buffer16 = new UChar[size16];

}


kdelibs'Lexer::~Lexer() (./kdelibs/kjs/lexer.cpp:61)

Lexer::~Lexer()
{
  delete [] buffer8;
  delete [] buffer16;
}


kdelibs'Lexer::curr() (./kdelibs/kjs/lexer.cpp:67)

Lexer *Lexer::curr()
{
  return KJScript::current()->lex;
}


kdelibs'Lexer::setCurrent() (./kdelibs/kjs/lexer.cpp:72)

void Lexer::setCurrent(Lexer *l)
{
  KJScript::current()->lex = l;
}


kdelibs'Lexer::setCode() (./kdelibs/kjs/lexer.cpp:77)

void Lexer::setCode(const UChar *c, unsigned int len)
{
  yylineno = 0;
  stackToken = 0;
  pos = 0;
  code = c;
  length = len;

  // read first characters
  current = (length > 0) ? code[0].unicode() : 0;
  next1 = (length > 1) ? code[1].unicode() : 0;
  next2 = (length > 2) ? code[2].unicode() : 0;
  next3 = (length > 3) ? code[3].unicode() : 0;
}


kdelibs'Lexer::shift() (./kdelibs/kjs/lexer.cpp:92)

void Lexer::shift(unsigned int p)
{
  while (p--) {
    pos++;
    current = next1;
    next1 = next2;
    next2 = next3;
    next3 = (pos + 3 < length) ? code[pos+3].unicode() : 0;
  }
}


kdelibs'Lexer::setDone() (./kdelibs/kjs/lexer.cpp:103)

void Lexer::setDone(State s)
{
  state = s;
  done = true;
}


kdelibs'Lexer::lex() (./kdelibs/kjs/lexer.cpp:109)

int Lexer::lex()
{
  int token = 0;
  state = Start;
  unsigned short stringType = 0; // either single or double quotes
  pos8 = pos16 = 0;
  done = false;
  terminator = false;

  // did we push a token on the stack previously ?
  // (after an automatic semicolon insertion)
  if (stackToken) {
    setDone(Other);
    token = stackToken;
    stackToken = 0;
  }

  while (!done) {
    switch (state) {
    case Start:
      if (isWhiteSpace()) {
	// do nothing
      } else if (current == '/' && next1 == '/') {
	shift(1);
	state = InSingleLineComment;
	// <!-- marks the beginning of a line comment (for www usage)
      } else if (current == '<' && next1 == '!' && next2 == '-' && next3 == '-') {
	shift(3);
	state = InSingleLineComment;
      } else if (current == '/' && next1 == '*') {
	shift(1);
	state = InMultiLineComment;
      } else if (current == 0) {
	setDone(Eof);
      } else if (isLineTerminator()) {
	yylineno++;
	terminator = true;
	if (restrKeyword) {
	  token = ';';
	  setDone(Other);
	}
      } else if (current == '"' || current == '\'') {
	state = InString;
	stringType = current;
      } else if (isIdentLetter()) {
	record16(current);
	state = InIdentifier;
      } else if (current == '0') {
	record8(current);
	state = InNum0;
      } else if (isDecimalDigit(current)) {
	record8(current);
	state = InNum;
      } else if (current == '.' && isDecimalDigit(next1)) {
	record8(current);
	state = InDecimal;
      } else {
	token = matchPunctuator(current, next1, next2, next3);
	if (token != -1) {
	  setDone(Other);
	} else {
	  //	  cerr << "encountered unknown character" << endl;
	  setDone(Bad);
	}
      }
      break;
    case InString:
      if (current == stringType) {
	shift(1);
	setDone(String);
      } else if (current == 0 || isLineTerminator()) {
	setDone(Bad);
      } else if (current == '\\') {
	state = InEscapeSequence;
      } else {
	record16(current);
      }
      break;
    // Escape Sequences inside of strings
    case InEscapeSequence:
      if (isOctalDigit(current)) {
	if (current >= '0' && current <= '3' &&
	    isOctalDigit(next1) && isOctalDigit(next2)) {
	  record16(convertOctal(current, next1, next2));
	  shift(2);
	  state = InString;
	} else if (isOctalDigit(current) && isOctalDigit(next1)) {
	  record16(convertOctal('0', current, next1));
	  shift(1);
	  state = InString;
	} else if (isOctalDigit(current)) {
	  record16(convertOctal('0', '0', current));
	  state = InString;
	} else {
	  setDone(Bad);
	}
      } else if (current == 'x')
	state = InHexEscape;
      else if (current == 'u')
	state = InUnicodeEscape;
      else {
	record16(singleEscape(current));
	state = InString;
      }
      break;
    case InHexEscape:
      if (isHexDigit(current) && isHexDigit(next1)) {
	state = InString;
	record16(convertHex(current, next1));
	shift(1);
      } else {
	setDone(Bad);
      }
      break;
    case InUnicodeEscape:
      if (isHexDigit(current) && isHexDigit(next1) &&
	  isHexDigit(next2) && isHexDigit(next3)) {
	record16(convertUnicode(current, next1, next2, next3));
	shift(3);
	state = InString;
      } else {
	setDone(Bad);
      }
      break;
    case InSingleLineComment:
      if (isLineTerminator()) {
	yylineno++;
	state = Start;
      } else if (current == 0) {
	setDone(Eof);
      }
      break;
    case InMultiLineComment:
      if (current == 0) {
	setDone(Bad);
      } else if (isLineTerminator()) {
	yylineno++;
      } else if (current == '*' && next1 == '/') {
	state = Start;
	shift(1);
      }
      break;
    case InIdentifier:
      if (isIdentLetter() || isDecimalDigit(current)) {
	record16(current);
	break;
      }
      setDone(Identifier);
      break;
    case InNum0:
      if (current == 'x' || current == 'X') {
	record8(current);
	state = InHex;
      } else if (current == '.') {
	record8(current);
	state = InDecimal;
      } else if (current == 'e' || current == 'E') {
	record8(current);
	state = InExponentIndicator;
      } else if (isOctalDigit(current)) {
	record8(current);
	state = InOctal;
      } else {
	setDone(Int);
      }
      break;
    case InHex:
      if (isHexDigit(current)) {
	record8(current);
      } else {
	setDone(Hex);
      }
      break;
    case InOctal:
      if (isOctalDigit(current)) {
	record8(current);
      } else
	setDone(Octal);
      break;
    case InNum:
      if (isDecimalDigit(current)) {
	record8(current);
      } else if (current == '.') {
	record8(current);
	state = InDecimal;
      } else if (current == 'e' || current == 'E') {
	record8(current);
	state = InExponentIndicator;
      } else
	setDone(Int);
      break;
    case InDecimal:
      if (isDecimalDigit(current)) {
	record8(current);
      } else if (current == 'e' || current == 'E') {
	record8(current);
	state = InExponentIndicator;
      } else
	setDone(Decimal);
      break;
    case InExponentIndicator:
      if (current == '+' || current == '-') {
	record8(current);
      } else if (isDecimalDigit(current)) {
	record8(current);
	state = InExponent;
      } else
	setDone(Bad);
      break;
    case InExponent:
      if (isDecimalDigit(current)) {
	record8(current);
      } else
	setDone(Decimal);
      break;
    default:
      assert(!"Unhandled state in switch statement");
    }

    // move on to the next character
    if (!done) {
      shift(1);
    }
  }

  // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
  if ((state == Int || state == Decimal || state == Octal || state == Hex)
      && isIdentLetter())
    state = Bad;

  // terminate string
  buffer8[pos8] = '\0';

#ifdef KJS_DEBUG_LEX
  fprintf(stderr, "line: %d ", lineNo());
  fprintf(stderr, "yytext (%x): ", buffer8[0]);
  fprintf(stderr, "%s ", buffer8);
#endif

  int i;
  // scan integer and hex numbers
  if (state == Int || state == Hex) {
    sscanf(buffer8, "%i", &i);
    state = Int;
  }
  // scan octal number
  if (state == Octal) {
    unsigned int ui;
    sscanf(buffer8, "%o", &ui);
    i = ui;
    state = Int;
  }

#ifdef KJS_DEBUG_LEX
  switch (state) {
  case Eof:
    printf("(EOF)\n");
    break;
  case Other:
    printf("(Other)\n");
    break;
  case Identifier:
    printf("(Identifier)/(Keyword)\n");
    break;
  case String:
    printf("(String)\n");
    break;
  case Int:
    printf("(Int)\n");
    break;
  case Decimal:
    printf("(Decimal)\n");
    break;
  default:
    printf("(unknown)");
  }
#endif

  restrKeyword = false;

  switch (state) {
  case Eof:
    return 0;
  case Other:
    return token;
  case Identifier:
    if ((token = Lookup::find(&mainTable, buffer16, pos16)) < 0) {
      kjsyylval.ustr = new UString(buffer16, pos16);
      return IDENT;
    }
    if (token == CONTINUE || token == BREAK ||
	token == RETURN || token == THROW)
      restrKeyword = true;
    return token;
  case String:
    kjsyylval.ustr = new UString(buffer16, pos16); return STRING;
  case Int:
    kjsyylval.ival = i; return INTEGER;
  case Decimal:
    kjsyylval.dval = strtod(buffer8, 0L); return DOUBLE;
  case Bad:
    fprintf(stderr, "yylex: ERROR.\n");
    return -1;
  default:
    assert(!"unhandled numeration value in switch");
    return -1;
  }
}


kdelibs'Lexer::isWhiteSpace() (./kdelibs/kjs/lexer.cpp:418)

bool Lexer::isWhiteSpace() const
{
  return (current == ' ' || current == '\t' ||
	  current == 0x0b || current == 0x0c);
}


kdelibs'Lexer::isLineTerminator() (./kdelibs/kjs/lexer.cpp:424)

bool Lexer::isLineTerminator() const
{
  return (current == '\n' || current == '\r');
}


kdelibs'Lexer::isIdentLetter() (./kdelibs/kjs/lexer.cpp:429)

bool Lexer::isIdentLetter() const
{
  /* TODO: allow other legitimate unicode chars */
  return (current >= 'a' && current <= 'z' ||
	  current >= 'A' && current <= 'Z' ||
	  current == '$' || current == '_');
}


kdelibs'Lexer::isDecimalDigit() (./kdelibs/kjs/lexer.cpp:437)

bool Lexer::isDecimalDigit(unsigned short c) const
{
  return (c >= '0' && c <= '9');
}


kdelibs'Lexer::isHexDigit() (./kdelibs/kjs/lexer.cpp:442)

bool Lexer::isHexDigit(unsigned short c) const
{
  return (c >= '0' && c <= '9' ||
	  c >= 'a' && c <= 'f' ||
	  c >= 'A' && c <= 'F');
}


kdelibs'Lexer::isOctalDigit() (./kdelibs/kjs/lexer.cpp:449)

bool Lexer::isOctalDigit(unsigned short c) const
{
  return (c >= '0' && c <= '7');
}


kdelibs'Lexer::matchPunctuator() (./kdelibs/kjs/lexer.cpp:454)

int Lexer::matchPunctuator(unsigned short c1, unsigned short c2,
			      unsigned short c3, unsigned short c4)
{
  if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
    shift(4);
    return URSHIFTEQUAL;
  } else if (c1 == '=' && c2 == '=' && c3 == '=') {
    shift(3);
    return STREQ;
  } else if (c1 == '!' && c2 == '=' && c3 == '=') {
    shift(3);
    return STRNEQ;
   } else if (c1 == '>' && c2 == '>' && c3 == '>') {
    shift(3);
    return URSHIFT;
  } else if (c1 == '<' && c2 == '<' && c3 == '=') {
    shift(3);
    return LSHIFTEQUAL;
  } else if (c1 == '>' && c2 == '>' && c3 == '=') {
    shift(3);
    return RSHIFTEQUAL;
  } else if (c1 == '<' && c2 == '=') {
    shift(2);
    return LE;
  } else if (c1 == '>' && c2 == '=') {
    shift(2);
    return GE;
  } else if (c1 == '!' && c2 == '=') {
    shift(2);
    return NE;
  } else if (c1 == '+' && c2 == '+') {
    shift(2);
    if (terminator) {
      // automatic semicolon insertion
      stackToken = PLUSPLUS;
      return AUTO;
    } else
      return PLUSPLUS;
  } else if (c1 == '-' && c2 == '-') {
    shift(2);
    if (terminator) {
      // automatic semicolon insertion
      stackToken = MINUSMINUS;
      return AUTO;
    } else
      return MINUSMINUS;
  } else if (c1 == '=' && c2 == '=') {
    shift(2);
    return EQEQ;
  } else if (c1 == '+' && c2 == '=') {
    shift(2);
    return PLUSEQUAL;
  } else if (c1 == '-' && c2 == '=') {
    shift(2);
    return MINUSEQUAL;
  } else if (c1 == '*' && c2 == '=') {
    shift(2);
    return MULTEQUAL;
  } else if (c1 == '/' && c2 == '=') {
    shift(2);
    return DIVEQUAL;
  } else if (c1 == '&' && c2 == '=') {
    shift(2);
    return ANDEQUAL;
  } else if (c1 == '^' && c2 == '=') {
    shift(2);
    return XOREQUAL;
  } else if (c1 == '%' && c2 == '=') {
    shift(2);
    return MODEQUAL;
  } else if (c1 == '<' && c2 == '=') {
    shift(2);
    return OREQUAL;
  } else if (c1 == '<' && c2 == '<') {
    shift(2);
    return LSHIFT;
  } else if (c1 == '>' && c2 == '>') {
    shift(2);
    return RSHIFT;
  } else if (c1 == '&' && c2 == '&') {
    shift(2);
    return AND;
  } else if (c1 == '|' && c2 == '|') {
    shift(2);
    return OR;
  }

  switch(c1) {
    case '=':
    case '>':
    case '<':
    case ',':
    case '!':
    case '~':
    case '?':
    case ':':
    case '.':
    case '+':
    case '-':
    case '*':
    case '/':
    case '&':
    case '|':
    case '^':
    case '%':
    case '(':
    case ')':
    case '{':
    case '}':
    case '[':
    case ']':
    case ';':
      shift(1);
      return static_cast<int>(c1);
    default:
      return -1;
  }
}


kdelibs'Lexer::singleEscape() (./kdelibs/kjs/lexer.cpp:573)

unsigned char Lexer::singleEscape(unsigned short c) const
{
  switch(c) {
  case 'b':
    return 0x08;
    break;
  case 't':
    return 0x09;
    break;
  case 'n':
    return 0x0A;
    break;
  case 'f':
    return 0x0C;
    break;
  case 'r':
    return 0x0D;
    break;
  case '"':
    return 0x22;
    break;
  case '\'':
    return 0x27;
    break;
  case '\\':
    return 0x5C;
    break;
  default:
    return c;
  }
}


kdelibs'Lexer::convertOctal() (./kdelibs/kjs/lexer.cpp:605)

unsigned short Lexer::convertOctal(unsigned short c1, unsigned short c2,
                                      unsigned short c3) const
{
  return ((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
}


kdelibs'Lexer::convertHex() (./kdelibs/kjs/lexer.cpp:611)

unsigned char Lexer::convertHex(unsigned short c) const
{
  if (c >= '0' && c <= '9')
    return (c - '0');
  else if (c >= 'a' && c <= 'f')
    return (c - 'a' + 10);
  else
    return (c - 'A' + 10);
}


kdelibs'Lexer::convertHex() (./kdelibs/kjs/lexer.cpp:621)

unsigned char Lexer::convertHex(unsigned short c1, unsigned short c2) const
{
  return (convertHex(c1) << 4 + convertHex(c2));
}


kdelibs'Lexer::convertUnicode() (./kdelibs/kjs/lexer.cpp:626)

UChar Lexer::convertUnicode(unsigned short c1, unsigned short c2,
                                     unsigned short c3, unsigned short c4) const
{
  return UChar(convertHex(c1) << 4 + convertHex(c2),
                     convertHex(c3) << 4 + convertHex(c4));
}


kdelibs'Lexer::record8() (./kdelibs/kjs/lexer.cpp:633)

void Lexer::record8(unsigned short c)
{
  assert(c <= 0xff);

  // enlarge buffer if full
  if (pos8 >= size8 - 1) {
    char *tmp = new char[2 * size8];
    memcpy(tmp, buffer8, size8 * sizeof(char));
    delete [] buffer8;
    buffer8 = tmp;
    size8 *= 2;
  }

  buffer8[pos8++] = (char) c;
}


kdelibs'Lexer::record16() (./kdelibs/kjs/lexer.cpp:649)

void Lexer::record16(unsigned char c)
{
  record16(UChar(0, c));
}


kdelibs'Lexer::record16() (./kdelibs/kjs/lexer.cpp:654)

void Lexer::record16(UChar c)
{
  // enlarge buffer if full
  if (pos16 >= size16 - 1) {
    UChar *tmp = new UChar[2 * size16];
    memcpy(tmp, buffer16, size16 * sizeof(UChar));
    delete [] buffer16;
    buffer16 = tmp;
    size16 *= 2;
  }

  buffer16[pos16++] = c;
}


kdelibs'Lexer::scanRegExp() (./kdelibs/kjs/lexer.cpp:668)

bool Lexer::scanRegExp()
{
  pos16 = 0;

  while (1) {
    if (isLineTerminator() || current == 0)
      return false;
    else if (current != '/')
      record16(current);
    else {
      pattern = UString(buffer16, pos16);
      pos16 = 0;
      shift(1);
      break;
    }
    shift(1);
  }

  while (isIdentLetter()) {
    record16(current);
    shift(1);
  }
  flags = UString(buffer16, pos16);

  return true;
}