Source Code (Use browser search to find items of interest.)
Class Index
kdelibs'Lexer (./kdelibs/kjs/lexer.h:30)
class Lexer {
public:
Lexer();
~Lexer();
static Lexer *curr();
static void setCurrent(Lexer *l);
void setCode(const UChar *c, unsigned int len);
int lex();
int lineNo() const { return yylineno + 1; }
bool prevTerminator() const { return terminator; }
enum State { Start,
Identifier,
InIdentifier,
InSingleLineComment,
InMultiLineComment,
InNum,
InNum0,
InHex,
InOctal,
InDecimal,
InExponentIndicator,
InExponent,
Int,
Hex,
Octal,
Decimal,
String,
Eof,
InString,
InEscapeSequence,
InHexEscape,
InUnicodeEscape,
Other,
Bad };
bool scanRegExp();
UString pattern, flags;
private:
int yylineno;
bool done;
char *buffer8;
UChar *buffer16;
unsigned int size8, size16;
unsigned int pos8, pos16;
bool terminator;
bool restrKeyword;
int stackToken;
State state;
void setDone(State s);
unsigned int pos;
void shift(unsigned int p);
int lookupKeyword(const char *);
bool isWhiteSpace() const;
bool isLineTerminator() const;
bool isIdentLetter() const;
bool isDecimalDigit(unsigned short c) const;
bool isHexDigit(unsigned short c) const;
bool isOctalDigit(unsigned short c) const;
int matchPunctuator(unsigned short c1, unsigned short c2,
unsigned short c3, unsigned short c4);
unsigned char singleEscape(unsigned short c) const;
unsigned short convertOctal(unsigned short c1, unsigned short c2,
unsigned short c3) const;
unsigned char convertHex(unsigned short c1) const;
unsigned char convertHex(unsigned short c1, unsigned short c2) const;
UChar convertUnicode(unsigned short c1, unsigned short c2,
unsigned short c3, unsigned short c4) const;
void record8(unsigned short c);
void record16(unsigned char c);
void record16(UChar c);
const UChar *code;
unsigned int length;
int yycolumn;
// current and following unicode characters
unsigned short current, next1, next2, next3;
struct keyword {
const char *name;
int token;
};
// for future extensions
class LexerPrivate;
LexerPrivate *priv;
};
}; // namespace
kdelibs'Lexer::Lexer() (./kdelibs/kjs/lexer.cpp:48)
Lexer::Lexer()
: yylineno(0),
size8(128), size16(128),
stackToken(0), pos(0),
code(0), length(0),
current(0), next1(0), next2(0), next3(0)
{
// allocate space for read buffers
buffer8 = new char[size8];
buffer16 = new UChar[size16];
}
kdelibs'Lexer::~Lexer() (./kdelibs/kjs/lexer.cpp:61)
Lexer::~Lexer()
{
delete [] buffer8;
delete [] buffer16;
}
kdelibs'Lexer::curr() (./kdelibs/kjs/lexer.cpp:67)
Lexer *Lexer::curr()
{
return KJScript::current()->lex;
}
kdelibs'Lexer::setCurrent() (./kdelibs/kjs/lexer.cpp:72)
void Lexer::setCurrent(Lexer *l)
{
KJScript::current()->lex = l;
}
kdelibs'Lexer::setCode() (./kdelibs/kjs/lexer.cpp:77)
void Lexer::setCode(const UChar *c, unsigned int len)
{
yylineno = 0;
stackToken = 0;
pos = 0;
code = c;
length = len;
// read first characters
current = (length > 0) ? code[0].unicode() : 0;
next1 = (length > 1) ? code[1].unicode() : 0;
next2 = (length > 2) ? code[2].unicode() : 0;
next3 = (length > 3) ? code[3].unicode() : 0;
}
kdelibs'Lexer::shift() (./kdelibs/kjs/lexer.cpp:92)
void Lexer::shift(unsigned int p)
{
while (p--) {
pos++;
current = next1;
next1 = next2;
next2 = next3;
next3 = (pos + 3 < length) ? code[pos+3].unicode() : 0;
}
}
kdelibs'Lexer::setDone() (./kdelibs/kjs/lexer.cpp:103)
void Lexer::setDone(State s)
{
state = s;
done = true;
}
kdelibs'Lexer::lex() (./kdelibs/kjs/lexer.cpp:109)
int Lexer::lex()
{
int token = 0;
state = Start;
unsigned short stringType = 0; // either single or double quotes
pos8 = pos16 = 0;
done = false;
terminator = false;
// did we push a token on the stack previously ?
// (after an automatic semicolon insertion)
if (stackToken) {
setDone(Other);
token = stackToken;
stackToken = 0;
}
while (!done) {
switch (state) {
case Start:
if (isWhiteSpace()) {
// do nothing
} else if (current == '/' && next1 == '/') {
shift(1);
state = InSingleLineComment;
// <!-- marks the beginning of a line comment (for www usage)
} else if (current == '<' && next1 == '!' && next2 == '-' && next3 == '-') {
shift(3);
state = InSingleLineComment;
} else if (current == '/' && next1 == '*') {
shift(1);
state = InMultiLineComment;
} else if (current == 0) {
setDone(Eof);
} else if (isLineTerminator()) {
yylineno++;
terminator = true;
if (restrKeyword) {
token = ';';
setDone(Other);
}
} else if (current == '"' || current == '\'') {
state = InString;
stringType = current;
} else if (isIdentLetter()) {
record16(current);
state = InIdentifier;
} else if (current == '0') {
record8(current);
state = InNum0;
} else if (isDecimalDigit(current)) {
record8(current);
state = InNum;
} else if (current == '.' && isDecimalDigit(next1)) {
record8(current);
state = InDecimal;
} else {
token = matchPunctuator(current, next1, next2, next3);
if (token != -1) {
setDone(Other);
} else {
// cerr << "encountered unknown character" << endl;
setDone(Bad);
}
}
break;
case InString:
if (current == stringType) {
shift(1);
setDone(String);
} else if (current == 0 || isLineTerminator()) {
setDone(Bad);
} else if (current == '\\') {
state = InEscapeSequence;
} else {
record16(current);
}
break;
// Escape Sequences inside of strings
case InEscapeSequence:
if (isOctalDigit(current)) {
if (current >= '0' && current <= '3' &&
isOctalDigit(next1) && isOctalDigit(next2)) {
record16(convertOctal(current, next1, next2));
shift(2);
state = InString;
} else if (isOctalDigit(current) && isOctalDigit(next1)) {
record16(convertOctal('0', current, next1));
shift(1);
state = InString;
} else if (isOctalDigit(current)) {
record16(convertOctal('0', '0', current));
state = InString;
} else {
setDone(Bad);
}
} else if (current == 'x')
state = InHexEscape;
else if (current == 'u')
state = InUnicodeEscape;
else {
record16(singleEscape(current));
state = InString;
}
break;
case InHexEscape:
if (isHexDigit(current) && isHexDigit(next1)) {
state = InString;
record16(convertHex(current, next1));
shift(1);
} else {
setDone(Bad);
}
break;
case InUnicodeEscape:
if (isHexDigit(current) && isHexDigit(next1) &&
isHexDigit(next2) && isHexDigit(next3)) {
record16(convertUnicode(current, next1, next2, next3));
shift(3);
state = InString;
} else {
setDone(Bad);
}
break;
case InSingleLineComment:
if (isLineTerminator()) {
yylineno++;
state = Start;
} else if (current == 0) {
setDone(Eof);
}
break;
case InMultiLineComment:
if (current == 0) {
setDone(Bad);
} else if (isLineTerminator()) {
yylineno++;
} else if (current == '*' && next1 == '/') {
state = Start;
shift(1);
}
break;
case InIdentifier:
if (isIdentLetter() || isDecimalDigit(current)) {
record16(current);
break;
}
setDone(Identifier);
break;
case InNum0:
if (current == 'x' || current == 'X') {
record8(current);
state = InHex;
} else if (current == '.') {
record8(current);
state = InDecimal;
} else if (current == 'e' || current == 'E') {
record8(current);
state = InExponentIndicator;
} else if (isOctalDigit(current)) {
record8(current);
state = InOctal;
} else {
setDone(Int);
}
break;
case InHex:
if (isHexDigit(current)) {
record8(current);
} else {
setDone(Hex);
}
break;
case InOctal:
if (isOctalDigit(current)) {
record8(current);
} else
setDone(Octal);
break;
case InNum:
if (isDecimalDigit(current)) {
record8(current);
} else if (current == '.') {
record8(current);
state = InDecimal;
} else if (current == 'e' || current == 'E') {
record8(current);
state = InExponentIndicator;
} else
setDone(Int);
break;
case InDecimal:
if (isDecimalDigit(current)) {
record8(current);
} else if (current == 'e' || current == 'E') {
record8(current);
state = InExponentIndicator;
} else
setDone(Decimal);
break;
case InExponentIndicator:
if (current == '+' || current == '-') {
record8(current);
} else if (isDecimalDigit(current)) {
record8(current);
state = InExponent;
} else
setDone(Bad);
break;
case InExponent:
if (isDecimalDigit(current)) {
record8(current);
} else
setDone(Decimal);
break;
default:
assert(!"Unhandled state in switch statement");
}
// move on to the next character
if (!done) {
shift(1);
}
}
// no identifiers allowed directly after numeric literal, e.g. "3in" is bad
if ((state == Int || state == Decimal || state == Octal || state == Hex)
&& isIdentLetter())
state = Bad;
// terminate string
buffer8[pos8] = '\0';
#ifdef KJS_DEBUG_LEX
fprintf(stderr, "line: %d ", lineNo());
fprintf(stderr, "yytext (%x): ", buffer8[0]);
fprintf(stderr, "%s ", buffer8);
#endif
int i;
// scan integer and hex numbers
if (state == Int || state == Hex) {
sscanf(buffer8, "%i", &i);
state = Int;
}
// scan octal number
if (state == Octal) {
unsigned int ui;
sscanf(buffer8, "%o", &ui);
i = ui;
state = Int;
}
#ifdef KJS_DEBUG_LEX
switch (state) {
case Eof:
printf("(EOF)\n");
break;
case Other:
printf("(Other)\n");
break;
case Identifier:
printf("(Identifier)/(Keyword)\n");
break;
case String:
printf("(String)\n");
break;
case Int:
printf("(Int)\n");
break;
case Decimal:
printf("(Decimal)\n");
break;
default:
printf("(unknown)");
}
#endif
restrKeyword = false;
switch (state) {
case Eof:
return 0;
case Other:
return token;
case Identifier:
if ((token = Lookup::find(&mainTable, buffer16, pos16)) < 0) {
kjsyylval.ustr = new UString(buffer16, pos16);
return IDENT;
}
if (token == CONTINUE || token == BREAK ||
token == RETURN || token == THROW)
restrKeyword = true;
return token;
case String:
kjsyylval.ustr = new UString(buffer16, pos16); return STRING;
case Int:
kjsyylval.ival = i; return INTEGER;
case Decimal:
kjsyylval.dval = strtod(buffer8, 0L); return DOUBLE;
case Bad:
fprintf(stderr, "yylex: ERROR.\n");
return -1;
default:
assert(!"unhandled numeration value in switch");
return -1;
}
}
kdelibs'Lexer::isWhiteSpace() (./kdelibs/kjs/lexer.cpp:418)
bool Lexer::isWhiteSpace() const
{
return (current == ' ' || current == '\t' ||
current == 0x0b || current == 0x0c);
}
kdelibs'Lexer::isLineTerminator() (./kdelibs/kjs/lexer.cpp:424)
bool Lexer::isLineTerminator() const
{
return (current == '\n' || current == '\r');
}
kdelibs'Lexer::isIdentLetter() (./kdelibs/kjs/lexer.cpp:429)
bool Lexer::isIdentLetter() const
{
/* TODO: allow other legitimate unicode chars */
return (current >= 'a' && current <= 'z' ||
current >= 'A' && current <= 'Z' ||
current == '$' || current == '_');
}
kdelibs'Lexer::isDecimalDigit() (./kdelibs/kjs/lexer.cpp:437)
bool Lexer::isDecimalDigit(unsigned short c) const
{
return (c >= '0' && c <= '9');
}
kdelibs'Lexer::isHexDigit() (./kdelibs/kjs/lexer.cpp:442)
bool Lexer::isHexDigit(unsigned short c) const
{
return (c >= '0' && c <= '9' ||
c >= 'a' && c <= 'f' ||
c >= 'A' && c <= 'F');
}
kdelibs'Lexer::isOctalDigit() (./kdelibs/kjs/lexer.cpp:449)
bool Lexer::isOctalDigit(unsigned short c) const
{
return (c >= '0' && c <= '7');
}
kdelibs'Lexer::matchPunctuator() (./kdelibs/kjs/lexer.cpp:454)
int Lexer::matchPunctuator(unsigned short c1, unsigned short c2,
unsigned short c3, unsigned short c4)
{
if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
shift(4);
return URSHIFTEQUAL;
} else if (c1 == '=' && c2 == '=' && c3 == '=') {
shift(3);
return STREQ;
} else if (c1 == '!' && c2 == '=' && c3 == '=') {
shift(3);
return STRNEQ;
} else if (c1 == '>' && c2 == '>' && c3 == '>') {
shift(3);
return URSHIFT;
} else if (c1 == '<' && c2 == '<' && c3 == '=') {
shift(3);
return LSHIFTEQUAL;
} else if (c1 == '>' && c2 == '>' && c3 == '=') {
shift(3);
return RSHIFTEQUAL;
} else if (c1 == '<' && c2 == '=') {
shift(2);
return LE;
} else if (c1 == '>' && c2 == '=') {
shift(2);
return GE;
} else if (c1 == '!' && c2 == '=') {
shift(2);
return NE;
} else if (c1 == '+' && c2 == '+') {
shift(2);
if (terminator) {
// automatic semicolon insertion
stackToken = PLUSPLUS;
return AUTO;
} else
return PLUSPLUS;
} else if (c1 == '-' && c2 == '-') {
shift(2);
if (terminator) {
// automatic semicolon insertion
stackToken = MINUSMINUS;
return AUTO;
} else
return MINUSMINUS;
} else if (c1 == '=' && c2 == '=') {
shift(2);
return EQEQ;
} else if (c1 == '+' && c2 == '=') {
shift(2);
return PLUSEQUAL;
} else if (c1 == '-' && c2 == '=') {
shift(2);
return MINUSEQUAL;
} else if (c1 == '*' && c2 == '=') {
shift(2);
return MULTEQUAL;
} else if (c1 == '/' && c2 == '=') {
shift(2);
return DIVEQUAL;
} else if (c1 == '&' && c2 == '=') {
shift(2);
return ANDEQUAL;
} else if (c1 == '^' && c2 == '=') {
shift(2);
return XOREQUAL;
} else if (c1 == '%' && c2 == '=') {
shift(2);
return MODEQUAL;
} else if (c1 == '<' && c2 == '=') {
shift(2);
return OREQUAL;
} else if (c1 == '<' && c2 == '<') {
shift(2);
return LSHIFT;
} else if (c1 == '>' && c2 == '>') {
shift(2);
return RSHIFT;
} else if (c1 == '&' && c2 == '&') {
shift(2);
return AND;
} else if (c1 == '|' && c2 == '|') {
shift(2);
return OR;
}
switch(c1) {
case '=':
case '>':
case '<':
case ',':
case '!':
case '~':
case '?':
case ':':
case '.':
case '+':
case '-':
case '*':
case '/':
case '&':
case '|':
case '^':
case '%':
case '(':
case ')':
case '{':
case '}':
case '[':
case ']':
case ';':
shift(1);
return static_cast<int>(c1);
default:
return -1;
}
}
kdelibs'Lexer::singleEscape() (./kdelibs/kjs/lexer.cpp:573)
unsigned char Lexer::singleEscape(unsigned short c) const
{
switch(c) {
case 'b':
return 0x08;
break;
case 't':
return 0x09;
break;
case 'n':
return 0x0A;
break;
case 'f':
return 0x0C;
break;
case 'r':
return 0x0D;
break;
case '"':
return 0x22;
break;
case '\'':
return 0x27;
break;
case '\\':
return 0x5C;
break;
default:
return c;
}
}
kdelibs'Lexer::convertOctal() (./kdelibs/kjs/lexer.cpp:605)
unsigned short Lexer::convertOctal(unsigned short c1, unsigned short c2,
unsigned short c3) const
{
return ((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
}
kdelibs'Lexer::convertHex() (./kdelibs/kjs/lexer.cpp:611)
unsigned char Lexer::convertHex(unsigned short c) const
{
if (c >= '0' && c <= '9')
return (c - '0');
else if (c >= 'a' && c <= 'f')
return (c - 'a' + 10);
else
return (c - 'A' + 10);
}
kdelibs'Lexer::convertHex() (./kdelibs/kjs/lexer.cpp:621)
unsigned char Lexer::convertHex(unsigned short c1, unsigned short c2) const
{
return (convertHex(c1) << 4 + convertHex(c2));
}
kdelibs'Lexer::convertUnicode() (./kdelibs/kjs/lexer.cpp:626)
UChar Lexer::convertUnicode(unsigned short c1, unsigned short c2,
unsigned short c3, unsigned short c4) const
{
return UChar(convertHex(c1) << 4 + convertHex(c2),
convertHex(c3) << 4 + convertHex(c4));
}
kdelibs'Lexer::record8() (./kdelibs/kjs/lexer.cpp:633)
void Lexer::record8(unsigned short c)
{
assert(c <= 0xff);
// enlarge buffer if full
if (pos8 >= size8 - 1) {
char *tmp = new char[2 * size8];
memcpy(tmp, buffer8, size8 * sizeof(char));
delete [] buffer8;
buffer8 = tmp;
size8 *= 2;
}
buffer8[pos8++] = (char) c;
}
kdelibs'Lexer::record16() (./kdelibs/kjs/lexer.cpp:649)
void Lexer::record16(unsigned char c)
{
record16(UChar(0, c));
}
kdelibs'Lexer::record16() (./kdelibs/kjs/lexer.cpp:654)
void Lexer::record16(UChar c)
{
// enlarge buffer if full
if (pos16 >= size16 - 1) {
UChar *tmp = new UChar[2 * size16];
memcpy(tmp, buffer16, size16 * sizeof(UChar));
delete [] buffer16;
buffer16 = tmp;
size16 *= 2;
}
buffer16[pos16++] = c;
}
kdelibs'Lexer::scanRegExp() (./kdelibs/kjs/lexer.cpp:668)
bool Lexer::scanRegExp()
{
pos16 = 0;
while (1) {
if (isLineTerminator() || current == 0)
return false;
else if (current != '/')
record16(current);
else {
pattern = UString(buffer16, pos16);
pos16 = 0;
shift(1);
break;
}
shift(1);
}
while (isIdentLetter()) {
record16(current);
shift(1);
}
flags = UString(buffer16, pos16);
return true;
}