1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 : * vim: set ts=8 sw=4 et tw=99:
3 : *
4 : * ***** BEGIN LICENSE BLOCK *****
5 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 : *
7 : * The contents of this file are subject to the Mozilla Public License Version
8 : * 1.1 (the "License"); you may not use this file except in compliance with
9 : * the License. You may obtain a copy of the License at
10 : * http://www.mozilla.org/MPL/
11 : *
12 : * Software distributed under the License is distributed on an "AS IS" basis,
13 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 : * for the specific language governing rights and limitations under the
15 : * License.
16 : *
17 : * The Original Code is Mozilla Communicator client code, released
18 : * March 31, 1998.
19 : *
20 : * The Initial Developer of the Original Code is
21 : * Netscape Communications Corporation.
22 : * Portions created by the Initial Developer are Copyright (C) 1998
23 : * the Initial Developer. All Rights Reserved.
24 : *
25 : * Contributor(s):
26 : * Nick Fitzgerald <nfitzgerald@mozilla.com>
27 : *
28 : * Alternatively, the contents of this file may be used under the terms of
29 : * either of the GNU General Public License Version 2 or later (the "GPL"),
30 : * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
31 : * in which case the provisions of the GPL or the LGPL are applicable instead
32 : * of those above. If you wish to allow use of your version of this file only
33 : * under the terms of either the GPL or the LGPL, and not to allow others to
34 : * use your version of this file under the terms of the MPL, indicate your
35 : * decision by deleting the provisions above and replace them with the notice
36 : * and other provisions required by the GPL or the LGPL. If you do not delete
37 : * the provisions above, a recipient may use your version of this file under
38 : * the terms of any one of the MPL, the GPL or the LGPL.
39 : *
40 : * ***** END LICENSE BLOCK ***** */
41 :
42 : /*
43 : * JS lexical scanner.
44 : */
45 : #include <stdio.h> /* first to avoid trouble on some systems */
46 : #include <errno.h>
47 : #include <limits.h>
48 : #include <math.h>
49 : #ifdef HAVE_MEMORY_H
50 : #include <memory.h>
51 : #endif
52 : #include <stdarg.h>
53 : #include <stdlib.h>
54 : #include <string.h>
55 : #include "jstypes.h"
56 : #include "jsutil.h"
57 : #include "jsprf.h"
58 : #include "jsapi.h"
59 : #include "jsatom.h"
60 : #include "jscntxt.h"
61 : #include "jsversion.h"
62 : #include "jsexn.h"
63 : #include "jsnum.h"
64 : #include "jsopcode.h"
65 : #include "jsscript.h"
66 :
67 : #include "frontend/BytecodeEmitter.h"
68 : #include "frontend/Parser.h"
69 : #include "frontend/TokenStream.h"
70 : #include "vm/RegExpObject.h"
71 :
72 : #include "jsscriptinlines.h"
73 :
74 : #if JS_HAS_XML_SUPPORT
75 : #include "jsxml.h"
76 : #endif
77 :
78 : using namespace js;
79 : using namespace js::unicode;
80 :
81 : #define JS_KEYWORD(keyword, type, op, version) \
82 : const char js_##keyword##_str[] = #keyword;
83 : #include "jskeyword.tbl"
84 : #undef JS_KEYWORD
85 :
86 : static const KeywordInfo keywords[] = {
87 : #define JS_KEYWORD(keyword, type, op, version) \
88 : {js_##keyword##_str, type, op, version},
89 : #include "jskeyword.tbl"
90 : #undef JS_KEYWORD
91 : };
92 :
93 : const KeywordInfo *
94 26501645 : js::FindKeyword(const jschar *s, size_t length)
95 : {
96 26501645 : JS_ASSERT(length != 0);
97 :
98 : register size_t i;
99 : const struct KeywordInfo *kw;
100 : const char *chars;
101 :
102 : #define JSKW_LENGTH() length
103 : #define JSKW_AT(column) s[column]
104 : #define JSKW_GOT_MATCH(index) i = (index); goto got_match;
105 : #define JSKW_TEST_GUESS(index) i = (index); goto test_guess;
106 : #define JSKW_NO_MATCH() goto no_match;
107 : #include "jsautokw.h"
108 : #undef JSKW_NO_MATCH
109 : #undef JSKW_TEST_GUESS
110 : #undef JSKW_GOT_MATCH
111 : #undef JSKW_AT
112 : #undef JSKW_LENGTH
113 :
114 : got_match:
115 6034273 : return &keywords[i];
116 :
117 : test_guess:
118 2068731 : kw = &keywords[i];
119 2068731 : chars = kw->chars;
120 12846178 : do {
121 13202473 : if (*s++ != (unsigned char)(*chars++))
122 356295 : goto no_match;
123 : } while (--length != 0);
124 1712436 : return kw;
125 :
126 : no_match:
127 18754936 : return NULL;
128 : }
129 :
130 : JSBool
131 390741 : js::IsIdentifier(JSLinearString *str)
132 : {
133 390741 : const jschar *chars = str->chars();
134 390741 : size_t length = str->length();
135 :
136 390741 : if (length == 0)
137 27 : return JS_FALSE;
138 390714 : jschar c = *chars;
139 390714 : if (!IsIdentifierStart(c))
140 80 : return JS_FALSE;
141 390634 : const jschar *end = chars + length;
142 2229879 : while (++chars != end) {
143 1448640 : c = *chars;
144 1448640 : if (!IsIdentifierPart(c))
145 29 : return JS_FALSE;
146 : }
147 390605 : return JS_TRUE;
148 : }
149 :
150 : #ifdef _MSC_VER
151 : #pragma warning(push)
152 : #pragma warning(disable:4351)
153 : #endif
154 :
155 : /* Initialize members that aren't initialized in |init|. */
156 138617 : TokenStream::TokenStream(JSContext *cx, JSPrincipals *prin, JSPrincipals *originPrin)
157 : : tokens(), cursor(), lookahead(), flags(), listenerTSData(), tokenbuf(cx),
158 138617 : cx(cx), originPrincipals(JSScript::normalizeOriginPrincipals(prin, originPrin))
159 : {
160 138617 : if (originPrincipals)
161 26964 : JS_HoldPrincipals(originPrincipals);
162 138617 : }
163 :
164 : #ifdef _MSC_VER
165 : #pragma warning(pop)
166 : #endif
167 :
168 : bool
169 138617 : TokenStream::init(const jschar *base, size_t length, const char *fn, unsigned ln, JSVersion v)
170 : {
171 138617 : filename = fn;
172 138617 : lineno = ln;
173 138617 : version = v;
174 138617 : xml = VersionHasXML(v);
175 :
176 138617 : userbuf.init(base, length);
177 138617 : linebase = base;
178 138617 : prevLinebase = NULL;
179 138617 : sourceMap = NULL;
180 :
181 138617 : JSSourceHandler listener = cx->runtime->debugHooks.sourceHandler;
182 138617 : void *listenerData = cx->runtime->debugHooks.sourceHandlerData;
183 :
184 138617 : if (listener)
185 0 : listener(fn, ln, base, length, &listenerTSData, listenerData);
186 :
187 : /*
188 : * This table holds all the token kinds that satisfy these properties:
189 : * - A single char long.
190 : * - Cannot be a prefix of any longer token (eg. '+' is excluded because
191 : * '+=' is a valid token).
192 : * - Doesn't need tp->t_op set (eg. this excludes '~').
193 : *
194 : * The few token kinds satisfying these properties cover roughly 35--45%
195 : * of the tokens seen in practice.
196 : *
197 : * Nb: oneCharTokens, maybeEOL and maybeStrSpecial could be static, but
198 : * initializing them this way is a bit easier. Don't worry, the time to
199 : * initialize them for each TokenStream is trivial. See bug 639420.
200 : */
201 138617 : memset(oneCharTokens, 0, sizeof(oneCharTokens));
202 138617 : oneCharTokens[unsigned(';')] = TOK_SEMI;
203 138617 : oneCharTokens[unsigned(',')] = TOK_COMMA;
204 138617 : oneCharTokens[unsigned('?')] = TOK_HOOK;
205 138617 : oneCharTokens[unsigned('[')] = TOK_LB;
206 138617 : oneCharTokens[unsigned(']')] = TOK_RB;
207 138617 : oneCharTokens[unsigned('{')] = TOK_LC;
208 138617 : oneCharTokens[unsigned('}')] = TOK_RC;
209 138617 : oneCharTokens[unsigned('(')] = TOK_LP;
210 138617 : oneCharTokens[unsigned(')')] = TOK_RP;
211 :
212 : /* See getChar() for an explanation of maybeEOL[]. */
213 138617 : memset(maybeEOL, 0, sizeof(maybeEOL));
214 138617 : maybeEOL[unsigned('\n')] = true;
215 138617 : maybeEOL[unsigned('\r')] = true;
216 138617 : maybeEOL[unsigned(LINE_SEPARATOR & 0xff)] = true;
217 138617 : maybeEOL[unsigned(PARA_SEPARATOR & 0xff)] = true;
218 :
219 : /* See getTokenInternal() for an explanation of maybeStrSpecial[]. */
220 138617 : memset(maybeStrSpecial, 0, sizeof(maybeStrSpecial));
221 138617 : maybeStrSpecial[unsigned('"')] = true;
222 138617 : maybeStrSpecial[unsigned('\'')] = true;
223 138617 : maybeStrSpecial[unsigned('\\')] = true;
224 138617 : maybeStrSpecial[unsigned('\n')] = true;
225 138617 : maybeStrSpecial[unsigned('\r')] = true;
226 138617 : maybeStrSpecial[unsigned(LINE_SEPARATOR & 0xff)] = true;
227 138617 : maybeStrSpecial[unsigned(PARA_SEPARATOR & 0xff)] = true;
228 138617 : maybeStrSpecial[unsigned(EOF & 0xff)] = true;
229 :
230 : /*
231 : * Set |ln| as the beginning line number of the ungot "current token", so
232 : * that js::Parser::statements (and potentially other such methods, in the
233 : * future) can create parse nodes with good source coordinates before they
234 : * explicitly get any tokens.
235 : *
236 : * Switching the parser/lexer so we always get the next token ahead of the
237 : * parser needing it (the so-called "pump-priming" model) might be a better
238 : * way to address the dependency from statements on the current token.
239 : */
240 138617 : tokens[0].pos.begin.lineno = tokens[0].pos.end.lineno = ln;
241 138617 : return true;
242 : }
243 :
244 277234 : TokenStream::~TokenStream()
245 : {
246 138617 : if (flags & TSF_OWNFILENAME)
247 1 : cx->free_((void *) filename);
248 138617 : if (sourceMap)
249 0 : cx->free_(sourceMap);
250 138617 : if (originPrincipals)
251 26964 : JS_DropPrincipals(cx->runtime, originPrincipals);
252 138617 : }
253 :
254 : /* Use the fastest available getc. */
255 : #if defined(HAVE_GETC_UNLOCKED)
256 : # define fast_getc getc_unlocked
257 : #elif defined(HAVE__GETC_NOLOCK)
258 : # define fast_getc _getc_nolock
259 : #else
260 : # define fast_getc getc
261 : #endif
262 :
263 : JS_ALWAYS_INLINE void
264 19616771 : TokenStream::updateLineInfoForEOL()
265 : {
266 19616771 : prevLinebase = linebase;
267 19616771 : linebase = userbuf.addressOfNextRawChar();
268 19616771 : lineno++;
269 19616771 : }
270 :
271 : JS_ALWAYS_INLINE void
272 16263262 : TokenStream::updateFlagsForEOL()
273 : {
274 16263262 : flags &= ~TSF_DIRTYLINE;
275 16263262 : flags |= TSF_EOL;
276 16263262 : }
277 :
278 : /* This gets the next char, normalizing all EOL sequences to '\n' as it goes. */
279 : int32_t
280 162909483 : TokenStream::getChar()
281 : {
282 : int32_t c;
283 162909483 : if (JS_LIKELY(userbuf.hasRawChars())) {
284 162909411 : c = userbuf.getRawChar();
285 :
286 : /*
287 : * Normalize the jschar if it was a newline. We need to detect any of
288 : * these four characters: '\n' (0x000a), '\r' (0x000d),
289 : * LINE_SEPARATOR (0x2028), PARA_SEPARATOR (0x2029). Testing for each
290 : * one in turn is slow, so we use a single probabilistic check, and if
291 : * that succeeds, test for them individually.
292 : *
293 : * We use the bottom 8 bits to index into a lookup table, succeeding
294 : * when d&0xff is 0xa, 0xd, 0x28 or 0x29. Among ASCII chars (which
295 : * are by the far the most common) this gives false positives for '('
296 : * (0x0028) and ')' (0x0029). We could avoid those by incorporating
297 : * the 13th bit of d into the lookup, but that requires extra shifting
298 : * and masking and isn't worthwhile. See TokenStream::init() for the
299 : * initialization of the relevant entries in the table.
300 : */
301 162909411 : if (JS_UNLIKELY(maybeEOL[c & 0xff])) {
302 4013242 : if (c == '\n')
303 3600876 : goto eol;
304 412366 : if (c == '\r') {
305 : /* if it's a \r\n sequence: treat as a single EOL, skip over the \n */
306 17205 : if (userbuf.hasRawChars())
307 17205 : userbuf.matchRawChar('\n');
308 17205 : goto eol;
309 : }
310 395161 : if (c == LINE_SEPARATOR || c == PARA_SEPARATOR)
311 : goto eol;
312 : }
313 159291330 : return c;
314 : }
315 :
316 72 : flags |= TSF_EOF;
317 72 : return EOF;
318 :
319 : eol:
320 3618081 : updateLineInfoForEOL();
321 3618081 : return '\n';
322 : }
323 :
324 : /*
325 : * This gets the next char. It does nothing special with EOL sequences, not
326 : * even updating the line counters. It can be used safely if (a) the
327 : * resulting char is guaranteed to be ungotten (by ungetCharIgnoreEOL()) if
328 : * it's an EOL, and (b) the line-related state (lineno, linebase) is not used
329 : * before it's ungotten.
330 : */
331 : int32_t
332 876145174 : TokenStream::getCharIgnoreEOL()
333 : {
334 876145174 : if (JS_LIKELY(userbuf.hasRawChars()))
335 876131907 : return userbuf.getRawChar();
336 :
337 13267 : flags |= TSF_EOF;
338 13267 : return EOF;
339 : }
340 :
341 : void
342 16332096 : TokenStream::ungetChar(int32_t c)
343 : {
344 16332096 : if (c == EOF)
345 71 : return;
346 16332025 : JS_ASSERT(!userbuf.atStart());
347 16332025 : userbuf.ungetRawChar();
348 16332025 : if (c == '\n') {
349 : #ifdef DEBUG
350 1547617 : int32_t c2 = userbuf.peekRawChar();
351 1547617 : JS_ASSERT(TokenBuf::isRawEOLChar(c2));
352 : #endif
353 :
354 : /* if it's a \r\n sequence, also unget the \r */
355 1547617 : if (!userbuf.atStart())
356 1547617 : userbuf.matchRawCharBackwards('\r');
357 :
358 1547617 : JS_ASSERT(prevLinebase); /* we should never get more than one EOL char */
359 1547617 : linebase = prevLinebase;
360 1547617 : prevLinebase = NULL;
361 1547617 : lineno--;
362 : } else {
363 14784408 : JS_ASSERT(userbuf.peekRawChar() == c);
364 : }
365 : }
366 :
367 : void
368 56504616 : TokenStream::ungetCharIgnoreEOL(int32_t c)
369 : {
370 56504616 : if (c == EOF)
371 13249 : return;
372 56491367 : JS_ASSERT(!userbuf.atStart());
373 56491367 : userbuf.ungetRawChar();
374 : }
375 :
376 : /*
377 : * Return true iff |n| raw characters can be read from this without reading past
378 : * EOF or a newline, and copy those characters into |cp| if so. The characters
379 : * are not consumed: use skipChars(n) to do so after checking that the consumed
380 : * characters had appropriate values.
381 : */
382 : bool
383 737221 : TokenStream::peekChars(int n, jschar *cp)
384 : {
385 : int i, j;
386 : int32_t c;
387 :
388 12244803 : for (i = 0; i < n; i++) {
389 11557732 : c = getCharIgnoreEOL();
390 11557732 : if (c == EOF)
391 18 : break;
392 11557714 : if (c == '\n') {
393 50132 : ungetCharIgnoreEOL(c);
394 50132 : break;
395 : }
396 11507582 : cp[i] = (jschar)c;
397 : }
398 12244803 : for (j = i - 1; j >= 0; j--)
399 11507582 : ungetCharIgnoreEOL(cp[j]);
400 737221 : return i == n;
401 : }
402 :
403 : const jschar *
404 5442 : TokenStream::TokenBuf::findEOL()
405 : {
406 5442 : const jschar *tmp = ptr;
407 : #ifdef DEBUG
408 : /*
409 : * This is the one exception to the "TokenBuf isn't accessed after
410 : * poisoning" rule -- we may end up calling findEOL() in order to set up
411 : * an error.
412 : */
413 5442 : if (!tmp)
414 0 : tmp = ptrWhenPoisoned;
415 : #endif
416 :
417 19726 : while (true) {
418 25168 : if (tmp >= limit)
419 581 : break;
420 24587 : if (TokenBuf::isRawEOLChar(*tmp++))
421 4861 : break;
422 : }
423 5442 : return tmp;
424 : }
425 :
426 : bool
427 15762 : TokenStream::reportCompileErrorNumberVA(ParseNode *pn, unsigned flags, unsigned errorNumber, va_list ap)
428 : {
429 : JSErrorReport report;
430 : char *message;
431 : jschar *linechars;
432 : char *linebytes;
433 : bool warning;
434 : JSBool ok;
435 : const TokenPos *tp;
436 : unsigned i;
437 :
438 15762 : if (JSREPORT_IS_STRICT(flags) && !cx->hasStrictOption())
439 9858 : return true;
440 :
441 5904 : warning = JSREPORT_IS_WARNING(flags);
442 5904 : if (warning && cx->hasWErrorOption()) {
443 0 : flags &= ~JSREPORT_WARNING;
444 0 : warning = false;
445 : }
446 :
447 5904 : PodZero(&report);
448 5904 : report.flags = flags;
449 5904 : report.errorNumber = errorNumber;
450 5904 : message = NULL;
451 5904 : linechars = NULL;
452 5904 : linebytes = NULL;
453 :
454 : MUST_FLOW_THROUGH("out");
455 : ok = js_ExpandErrorArguments(cx, js_GetErrorMessage, NULL,
456 : errorNumber, &message, &report,
457 5904 : !(flags & JSREPORT_UC), ap);
458 5904 : if (!ok) {
459 0 : warning = false;
460 0 : goto out;
461 : }
462 :
463 5904 : report.filename = filename;
464 5904 : report.originPrincipals = originPrincipals;
465 :
466 5904 : tp = pn ? &pn->pn_pos : ¤tToken().pos;
467 5904 : report.lineno = tp->begin.lineno;
468 :
469 : /*
470 : * Given a token, T, that we want to complain about: if T's (starting)
471 : * lineno doesn't match TokenStream's lineno, that means we've scanned past
472 : * the line that T starts on, which makes it hard to print some or all of
473 : * T's (starting) line for context.
474 : *
475 : * So we don't even try, leaving report.linebuf and friends zeroed. This
476 : * means that any error involving a multi-line token (eg. an unterminated
477 : * multi-line string literal) won't have a context printed.
478 : */
479 5904 : if (report.lineno == lineno) {
480 5442 : size_t linelength = userbuf.findEOL() - linebase;
481 :
482 5442 : linechars = (jschar *)cx->malloc_((linelength + 1) * sizeof(jschar));
483 5442 : if (!linechars) {
484 0 : warning = false;
485 0 : goto out;
486 : }
487 5442 : PodCopy(linechars, linebase, linelength);
488 5442 : linechars[linelength] = 0;
489 5442 : linebytes = DeflateString(cx, linechars, linelength);
490 5442 : if (!linebytes) {
491 0 : warning = false;
492 0 : goto out;
493 : }
494 :
495 : /* Unicode and char versions of the offending source line, without final \n */
496 5442 : report.linebuf = linebytes;
497 5442 : report.uclinebuf = linechars;
498 :
499 : /* The lineno check above means we should only see single-line tokens here. */
500 5442 : JS_ASSERT(tp->begin.lineno == tp->end.lineno);
501 5442 : report.tokenptr = report.linebuf + tp->begin.index;
502 5442 : report.uctokenptr = report.uclinebuf + tp->begin.index;
503 : }
504 :
505 : /*
506 : * If there's a runtime exception type associated with this error
507 : * number, set that as the pending exception. For errors occuring at
508 : * compile time, this is very likely to be a JSEXN_SYNTAXERR.
509 : *
510 : * If an exception is thrown but not caught, the JSREPORT_EXCEPTION
511 : * flag will be set in report.flags. Proper behavior for an error
512 : * reporter is to ignore a report with this flag for all but top-level
513 : * compilation errors. The exception will remain pending, and so long
514 : * as the non-top-level "load", "eval", or "compile" native function
515 : * returns false, the top-level reporter will eventually receive the
516 : * uncaught exception report.
517 : */
518 5904 : if (!js_ErrorToException(cx, message, &report, NULL, NULL)) {
519 : /*
520 : * If debugErrorHook is present then we give it a chance to veto
521 : * sending the error on to the regular error reporter.
522 : */
523 5302 : bool reportError = true;
524 5302 : if (JSDebugErrorHook hook = cx->runtime->debugHooks.debugErrorHook)
525 1244 : reportError = hook(cx, message, &report, cx->runtime->debugHooks.debugErrorHookData);
526 :
527 : /* Report the error */
528 5302 : if (reportError && cx->errorReporter)
529 5302 : cx->errorReporter(cx, message, &report);
530 : }
531 :
532 : out:
533 5904 : if (linebytes)
534 5442 : cx->free_(linebytes);
535 5904 : if (linechars)
536 5442 : cx->free_(linechars);
537 5904 : if (message)
538 5904 : cx->free_(message);
539 5904 : if (report.ucmessage)
540 5904 : cx->free_((void *)report.ucmessage);
541 :
542 5904 : if (report.messageArgs) {
543 1508 : if (!(flags & JSREPORT_UC)) {
544 1508 : i = 0;
545 4718 : while (report.messageArgs[i])
546 1702 : cx->free_((void *)report.messageArgs[i++]);
547 : }
548 1508 : cx->free_((void *)report.messageArgs);
549 : }
550 :
551 5904 : return warning;
552 : }
553 :
554 : bool
555 18062 : js::ReportStrictModeError(JSContext *cx, TokenStream *ts, TreeContext *tc, ParseNode *pn,
556 : unsigned errorNumber, ...)
557 : {
558 18062 : JS_ASSERT(ts || tc);
559 18062 : JS_ASSERT(cx == ts->getContext());
560 :
561 : /* In strict mode code, this is an error, not merely a warning. */
562 : unsigned flags;
563 18062 : if ((ts && ts->isStrictMode()) || (tc && (tc->flags & TCF_STRICT_MODE_CODE))) {
564 0 : flags = JSREPORT_ERROR;
565 : } else {
566 18062 : if (!cx->hasStrictOption())
567 14735 : return true;
568 3327 : flags = JSREPORT_WARNING;
569 : }
570 :
571 : va_list ap;
572 3327 : va_start(ap, errorNumber);
573 3327 : bool result = ts->reportCompileErrorNumberVA(pn, flags, errorNumber, ap);
574 3327 : va_end(ap);
575 :
576 3327 : return result;
577 : }
578 :
579 : bool
580 3393 : js::ReportCompileErrorNumber(JSContext *cx, TokenStream *ts, ParseNode *pn, unsigned flags,
581 : unsigned errorNumber, ...)
582 : {
583 : va_list ap;
584 :
585 : /*
586 : * We don't accept a TreeContext argument, so we can't implement
587 : * JSREPORT_STRICT_MODE_ERROR here. Use ReportStrictModeError instead,
588 : * or do the checks in the caller and pass plain old JSREPORT_ERROR.
589 : */
590 3393 : JS_ASSERT(!(flags & JSREPORT_STRICT_MODE_ERROR));
591 :
592 3393 : va_start(ap, errorNumber);
593 3393 : JS_ASSERT(cx == ts->getContext());
594 3393 : bool result = ts->reportCompileErrorNumberVA(pn, flags, errorNumber, ap);
595 3393 : va_end(ap);
596 :
597 3393 : return result;
598 : }
599 :
600 : #if JS_HAS_XML_SUPPORT
601 :
602 : bool
603 0 : TokenStream::getXMLEntity()
604 : {
605 : ptrdiff_t offset, length, i;
606 : int c, d;
607 : JSBool ispair;
608 : jschar *bp, digit;
609 : char *bytes;
610 : JSErrNum msg;
611 :
612 0 : CharBuffer &tb = tokenbuf;
613 :
614 : /* Put the entity, including the '&' already scanned, in tokenbuf. */
615 0 : offset = tb.length();
616 0 : if (!tb.append('&'))
617 0 : return false;
618 0 : while ((c = getChar()) != ';') {
619 0 : if (c == EOF || c == '\n') {
620 0 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_END_OF_XML_ENTITY);
621 0 : return false;
622 : }
623 0 : if (!tb.append(c))
624 0 : return false;
625 : }
626 :
627 : /* Let length be the number of jschars after the '&', including the ';'. */
628 0 : length = tb.length() - offset;
629 0 : bp = tb.begin() + offset;
630 0 : c = d = 0;
631 0 : ispair = false;
632 0 : if (length > 2 && bp[1] == '#') {
633 : /* Match a well-formed XML Character Reference. */
634 0 : i = 2;
635 0 : if (length > 3 && (bp[i] == 'x' || bp[i] == 'X')) {
636 0 : if (length > 9) /* at most 6 hex digits allowed */
637 0 : goto badncr;
638 0 : while (++i < length) {
639 0 : digit = bp[i];
640 0 : if (!JS7_ISHEX(digit))
641 : goto badncr;
642 0 : c = (c << 4) + JS7_UNHEX(digit);
643 : }
644 : } else {
645 0 : while (i < length) {
646 0 : digit = bp[i++];
647 0 : if (!JS7_ISDEC(digit))
648 0 : goto badncr;
649 0 : c = (c * 10) + JS7_UNDEC(digit);
650 0 : if (c < 0)
651 0 : goto badncr;
652 : }
653 : }
654 :
655 0 : if (0x10000 <= c && c <= 0x10FFFF) {
656 : /* Form a surrogate pair (c, d) -- c is the high surrogate. */
657 0 : d = 0xDC00 + (c & 0x3FF);
658 0 : c = 0xD7C0 + (c >> 10);
659 0 : ispair = true;
660 : } else {
661 : /* Enforce the http://www.w3.org/TR/REC-xml/#wf-Legalchar WFC. */
662 0 : if (c != 0x9 && c != 0xA && c != 0xD &&
663 0 : !(0x20 <= c && c <= 0xD7FF) &&
664 0 : !(0xE000 <= c && c <= 0xFFFD)) {
665 : goto badncr;
666 : }
667 : }
668 : } else {
669 : /* Try to match one of the five XML 1.0 predefined entities. */
670 0 : switch (length) {
671 : case 3:
672 0 : if (bp[2] == 't') {
673 0 : if (bp[1] == 'l')
674 0 : c = '<';
675 0 : else if (bp[1] == 'g')
676 0 : c = '>';
677 : }
678 0 : break;
679 : case 4:
680 0 : if (bp[1] == 'a' && bp[2] == 'm' && bp[3] == 'p')
681 0 : c = '&';
682 0 : break;
683 : case 5:
684 0 : if (bp[3] == 'o') {
685 0 : if (bp[1] == 'a' && bp[2] == 'p' && bp[4] == 's')
686 0 : c = '\'';
687 0 : else if (bp[1] == 'q' && bp[2] == 'u' && bp[4] == 't')
688 0 : c = '"';
689 : }
690 0 : break;
691 : }
692 0 : if (c == 0) {
693 0 : msg = JSMSG_UNKNOWN_XML_ENTITY;
694 0 : goto bad;
695 : }
696 : }
697 :
698 : /* If we matched, retract tokenbuf and store the entity's value. */
699 0 : *bp++ = (jschar) c;
700 0 : if (ispair)
701 0 : *bp++ = (jschar) d;
702 0 : tb.shrinkBy(tb.end() - bp);
703 0 : return true;
704 :
705 : badncr:
706 0 : msg = JSMSG_BAD_XML_NCR;
707 : bad:
708 : /* No match: throw a TypeError per ECMA-357 10.3.2.1 step 8(a). */
709 0 : JS_ASSERT((tb.end() - bp) >= 1);
710 0 : bytes = DeflateString(cx, bp + 1, (tb.end() - bp) - 1);
711 0 : if (bytes) {
712 0 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, msg, bytes);
713 0 : cx->free_(bytes);
714 : }
715 0 : return false;
716 : }
717 :
718 : bool
719 10078 : TokenStream::getXMLTextOrTag(TokenKind *ttp, Token **tpp)
720 : {
721 : TokenKind tt;
722 : int c, qc;
723 : Token *tp;
724 : JSAtom *atom;
725 :
726 : /*
727 : * Look for XML text.
728 : */
729 10078 : if (flags & TSF_XMLTEXTMODE) {
730 1884 : tt = TOK_XMLSPACE; /* veto if non-space, return TOK_XMLTEXT */
731 1884 : tp = newToken(0);
732 1884 : tokenbuf.clear();
733 1884 : qc = (flags & TSF_XMLONLYMODE) ? '<' : '{';
734 :
735 3989 : while ((c = getChar()) != qc && c != '<' && c != EOF) {
736 221 : if (c == '&' && qc == '<') {
737 0 : if (!getXMLEntity())
738 0 : goto error;
739 0 : tt = TOK_XMLTEXT;
740 0 : continue;
741 : }
742 :
743 221 : if (!IsXMLSpace(c))
744 221 : tt = TOK_XMLTEXT;
745 221 : if (!tokenbuf.append(c))
746 0 : goto error;
747 : }
748 1884 : ungetChar(c);
749 :
750 1884 : if (tokenbuf.empty()) {
751 1729 : atom = NULL;
752 : } else {
753 155 : atom = atomize(cx, tokenbuf);
754 155 : if (!atom)
755 0 : goto error;
756 : }
757 1884 : tp->pos.end.lineno = lineno;
758 1884 : tp->setAtom(JSOP_STRING, atom);
759 1884 : goto out;
760 : }
761 :
762 : /*
763 : * XML tags.
764 : */
765 : else {
766 8194 : JS_ASSERT(flags & TSF_XMLTAGMODE);
767 8194 : tp = newToken(0);
768 8194 : c = getChar();
769 8194 : if (c != EOF && IsXMLSpace(c)) {
770 641 : do {
771 641 : c = getChar();
772 641 : if (c == EOF)
773 0 : break;
774 641 : } while (IsXMLSpace(c));
775 641 : ungetChar(c);
776 641 : tp->pos.end.lineno = lineno;
777 641 : tt = TOK_XMLSPACE;
778 641 : goto out;
779 : }
780 :
781 7553 : if (c == EOF) {
782 0 : tt = TOK_EOF;
783 0 : goto out;
784 : }
785 :
786 7553 : tokenbuf.clear();
787 7553 : if (IsXMLNamespaceStart(c)) {
788 3438 : JSBool sawColon = JS_FALSE;
789 :
790 3438 : if (!tokenbuf.append(c))
791 0 : goto error;
792 17038 : while ((c = getChar()) != EOF && IsXMLNamePart(c)) {
793 10162 : if (c == ':') {
794 : int nextc;
795 :
796 0 : if (sawColon ||
797 : (nextc = peekChar(),
798 : ((flags & TSF_XMLONLYMODE) || nextc != '{') &&
799 0 : !IsXMLNamePart(nextc))) {
800 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
801 0 : JSMSG_BAD_XML_QNAME);
802 0 : goto error;
803 : }
804 0 : sawColon = JS_TRUE;
805 : }
806 :
807 10162 : if (!tokenbuf.append(c))
808 0 : goto error;
809 : }
810 :
811 3438 : ungetChar(c);
812 3438 : atom = atomize(cx, tokenbuf);
813 3438 : if (!atom)
814 0 : goto error;
815 3438 : tp->setAtom(JSOP_STRING, atom);
816 3438 : tt = TOK_XMLNAME;
817 3438 : goto out;
818 : }
819 :
820 4115 : switch (c) {
821 : case '{':
822 0 : if (flags & TSF_XMLONLYMODE)
823 0 : goto bad_xml_char;
824 0 : tt = TOK_LC;
825 0 : goto out;
826 :
827 : case '=':
828 641 : tt = TOK_ASSIGN;
829 641 : goto out;
830 :
831 : case '"':
832 : case '\'':
833 641 : qc = c;
834 1300 : while ((c = getChar()) != qc) {
835 18 : if (c == EOF) {
836 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
837 0 : JSMSG_UNTERMINATED_STRING);
838 0 : goto error;
839 : }
840 :
841 : /*
842 : * XML attribute values are double-quoted when pretty-printed,
843 : * so escape " if it is expressed directly in a single-quoted
844 : * attribute value.
845 : */
846 18 : if (c == '"' && !(flags & TSF_XMLONLYMODE)) {
847 0 : JS_ASSERT(qc == '\'');
848 0 : if (!tokenbuf.append(js_quot_entity_str,
849 0 : strlen(js_quot_entity_str)))
850 0 : goto error;
851 0 : continue;
852 : }
853 :
854 18 : if (c == '&' && (flags & TSF_XMLONLYMODE)) {
855 0 : if (!getXMLEntity())
856 0 : goto error;
857 0 : continue;
858 : }
859 :
860 18 : if (!tokenbuf.append(c))
861 0 : goto error;
862 : }
863 641 : atom = atomize(cx, tokenbuf);
864 641 : if (!atom)
865 0 : goto error;
866 641 : tp->pos.end.lineno = lineno;
867 641 : tp->setAtom(JSOP_STRING, atom);
868 641 : tt = TOK_XMLATTR;
869 641 : goto out;
870 :
871 : case '>':
872 2356 : tt = TOK_XMLTAGC;
873 2356 : goto out;
874 :
875 : case '/':
876 477 : if (matchChar('>')) {
877 477 : tt = TOK_XMLPTAGC;
878 477 : goto out;
879 : }
880 : /* FALL THROUGH */
881 :
882 : bad_xml_char:
883 : default:
884 0 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_XML_CHARACTER);
885 0 : goto error;
886 : }
887 : JS_NOT_REACHED("getXMLTextOrTag 1");
888 : }
889 : JS_NOT_REACHED("getXMLTextOrTag 2");
890 :
891 : out:
892 10078 : *ttp = tt;
893 10078 : *tpp = tp;
894 10078 : return true;
895 :
896 : error:
897 0 : *ttp = TOK_ERROR;
898 0 : *tpp = tp;
899 0 : return false;
900 : }
901 :
902 : /*
903 : * After much testing, it's clear that Postel's advice to protocol designers
904 : * ("be liberal in what you accept, and conservative in what you send") invites
905 : * a natural-law repercussion for JS as "protocol":
906 : *
907 : * "If you are liberal in what you accept, others will utterly fail to be
908 : * conservative in what they send."
909 : *
910 : * Which means you will get <!-- comments to end of line in the middle of .js
911 : * files, and after if conditions whose then statements are on the next line,
912 : * and other wonders. See at least the following bugs:
913 : * - https://bugzilla.mozilla.org/show_bug.cgi?id=309242
914 : * - https://bugzilla.mozilla.org/show_bug.cgi?id=309712
915 : * - https://bugzilla.mozilla.org/show_bug.cgi?id=310993
916 : *
917 : * So without JSOPTION_XML, we changed around Firefox 1.5 never to scan an XML
918 : * comment or CDATA literal. Instead, we always scan <! as the start of an
919 : * HTML comment hack to end of line, used since Netscape 2 to hide script tag
920 : * content from script-unaware browsers.
921 : *
922 : * But this still leaves XML resources with certain internal structure
923 : * vulnerable to being loaded as script cross-origin, and some internal data
924 : * stolen, so for Firefox 3.5 and beyond, we reject programs whose source
925 : * consists only of XML literals. See:
926 : *
927 : * https://bugzilla.mozilla.org/show_bug.cgi?id=336551
928 : *
929 : * The check for this is in js::frontend::CompileScript.
930 : */
931 : bool
932 2860 : TokenStream::getXMLMarkup(TokenKind *ttp, Token **tpp)
933 : {
934 : TokenKind tt;
935 : int c;
936 2860 : Token *tp = *tpp;
937 :
938 : /* Check for XML comment or CDATA section. */
939 2860 : if (matchChar('!')) {
940 27 : tokenbuf.clear();
941 :
942 : /* Scan XML comment. */
943 27 : if (matchChar('-')) {
944 0 : if (!matchChar('-'))
945 0 : goto bad_xml_markup;
946 0 : while ((c = getChar()) != '-' || !matchChar('-')) {
947 0 : if (c == EOF)
948 0 : goto bad_xml_markup;
949 0 : if (!tokenbuf.append(c))
950 0 : goto error;
951 : }
952 0 : if (!matchChar('>'))
953 0 : goto bad_xml_markup;
954 :
955 0 : JSAtom *commentText = atomize(cx, tokenbuf);
956 0 : if (!commentText)
957 0 : goto error;
958 0 : tp->setAtom(JSOP_XMLCOMMENT, commentText);
959 0 : tp->pos.end.lineno = lineno;
960 0 : tt = TOK_XMLCOMMENT;
961 0 : goto out;
962 : }
963 :
964 : /* Scan CDATA section. */
965 27 : if (matchChar('[')) {
966 : jschar cp[6];
967 189 : if (peekChars(6, cp) &&
968 27 : cp[0] == 'C' &&
969 27 : cp[1] == 'D' &&
970 27 : cp[2] == 'A' &&
971 27 : cp[3] == 'T' &&
972 27 : cp[4] == 'A' &&
973 27 : cp[5] == '[') {
974 27 : skipChars(6);
975 108 : while ((c = getChar()) != ']' ||
976 27 : !peekChars(2, cp) ||
977 27 : cp[0] != ']' ||
978 27 : cp[1] != '>') {
979 270 : if (c == EOF)
980 0 : goto bad_xml_markup;
981 270 : if (!tokenbuf.append(c))
982 0 : goto error;
983 : }
984 27 : consumeKnownChar(']');
985 27 : consumeKnownChar('>');
986 :
987 27 : JSAtom *cdataContent = atomize(cx, tokenbuf);
988 27 : if (!cdataContent)
989 0 : goto error;
990 :
991 27 : tp->setAtom(JSOP_XMLCDATA, cdataContent);
992 27 : tp->pos.end.lineno = lineno;
993 27 : tt = TOK_XMLCDATA;
994 27 : goto out;
995 : }
996 0 : goto bad_xml_markup;
997 : }
998 : }
999 :
1000 : /* Check for processing instruction. */
1001 2833 : if (matchChar('?')) {
1002 0 : bool inTarget = true;
1003 0 : size_t targetLength = 0;
1004 0 : ptrdiff_t contentIndex = -1;
1005 :
1006 0 : tokenbuf.clear();
1007 0 : while ((c = getChar()) != '?' || peekChar() != '>') {
1008 0 : if (c == EOF)
1009 0 : goto bad_xml_markup;
1010 0 : if (inTarget) {
1011 0 : if (IsXMLSpace(c)) {
1012 0 : if (tokenbuf.empty())
1013 0 : goto bad_xml_markup;
1014 0 : inTarget = false;
1015 : } else {
1016 0 : if (!(tokenbuf.empty()
1017 0 : ? IsXMLNamespaceStart(c)
1018 0 : : IsXMLNamespacePart(c))) {
1019 0 : goto bad_xml_markup;
1020 : }
1021 0 : ++targetLength;
1022 : }
1023 : } else {
1024 0 : if (contentIndex < 0 && !IsXMLSpace(c))
1025 0 : contentIndex = tokenbuf.length();
1026 : }
1027 0 : if (!tokenbuf.append(c))
1028 0 : goto error;
1029 : }
1030 0 : if (targetLength == 0)
1031 0 : goto bad_xml_markup;
1032 :
1033 : JSAtom *data;
1034 0 : if (contentIndex < 0) {
1035 0 : data = cx->runtime->atomState.emptyAtom;
1036 : } else {
1037 0 : data = js_AtomizeChars(cx, tokenbuf.begin() + contentIndex,
1038 0 : tokenbuf.length() - contentIndex);
1039 0 : if (!data)
1040 0 : goto error;
1041 : }
1042 0 : tokenbuf.shrinkBy(tokenbuf.length() - targetLength);
1043 0 : consumeKnownChar('>');
1044 0 : JSAtom *target = atomize(cx, tokenbuf);
1045 0 : if (!target)
1046 0 : goto error;
1047 0 : tp->setProcessingInstruction(target->asPropertyName(), data);
1048 0 : tp->pos.end.lineno = lineno;
1049 0 : tt = TOK_XMLPI;
1050 0 : goto out;
1051 : }
1052 :
1053 : /* An XML start-of-tag character. */
1054 2833 : tt = matchChar('/') ? TOK_XMLETAGO : TOK_XMLSTAGO;
1055 :
1056 : out:
1057 2860 : *ttp = tt;
1058 2860 : *tpp = tp;
1059 2860 : return true;
1060 :
1061 : bad_xml_markup:
1062 0 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_XML_MARKUP);
1063 : error:
1064 0 : *ttp = TOK_ERROR;
1065 0 : *tpp = tp;
1066 0 : return false;
1067 : }
1068 : #endif /* JS_HAS_XML_SUPPORT */
1069 :
1070 : /*
1071 : * We have encountered a '\': check for a Unicode escape sequence after it.
1072 : * Return 'true' and the character code value (by value) if we found a
1073 : * Unicode escape sequence. Otherwise, return 'false'. In both cases, do not
1074 : * advance along the buffer.
1075 : */
1076 : bool
1077 0 : TokenStream::peekUnicodeEscape(int *result)
1078 : {
1079 : jschar cp[5];
1080 :
1081 0 : if (peekChars(5, cp) && cp[0] == 'u' &&
1082 0 : JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
1083 0 : JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
1084 : {
1085 0 : *result = (((((JS7_UNHEX(cp[1]) << 4)
1086 0 : + JS7_UNHEX(cp[2])) << 4)
1087 0 : + JS7_UNHEX(cp[3])) << 4)
1088 0 : + JS7_UNHEX(cp[4]);
1089 0 : return true;
1090 : }
1091 0 : return false;
1092 : }
1093 :
1094 : bool
1095 0 : TokenStream::matchUnicodeEscapeIdStart(int32_t *cp)
1096 : {
1097 0 : if (peekUnicodeEscape(cp) && IsIdentifierStart(*cp)) {
1098 0 : skipChars(5);
1099 0 : return true;
1100 : }
1101 0 : return false;
1102 : }
1103 :
1104 : bool
1105 0 : TokenStream::matchUnicodeEscapeIdent(int32_t *cp)
1106 : {
1107 0 : if (peekUnicodeEscape(cp) && IsIdentifierPart(*cp)) {
1108 0 : skipChars(5);
1109 0 : return true;
1110 : }
1111 0 : return false;
1112 : }
1113 :
1114 : /*
1115 : * Helper function which returns true if the first length(q) characters in p are
1116 : * the same as the characters in q.
1117 : */
1118 : static bool
1119 586133 : CharsMatch(const jschar *p, const char *q) {
1120 1172623 : while (*q) {
1121 586487 : if (*p++ != *q++)
1122 586130 : return false;
1123 : }
1124 3 : return true;
1125 : }
1126 :
1127 : bool
1128 2 : TokenStream::getAtLine()
1129 : {
1130 : int c;
1131 : jschar cp[5];
1132 : unsigned i, line, temp;
1133 : char filenameBuf[1024];
1134 :
1135 : /*
1136 : * Hack for source filters such as the Mozilla XUL preprocessor:
1137 : * "//@line 123\n" sets the number of the *next* line after the
1138 : * comment to 123. If we reach here, we've already seen "//".
1139 : */
1140 2 : if (peekChars(5, cp) && CharsMatch(cp, "@line")) {
1141 2 : skipChars(5);
1142 6 : while ((c = getChar()) != '\n' && c != EOF && IsSpaceOrBOM2(c))
1143 2 : continue;
1144 2 : if (JS7_ISDEC(c)) {
1145 2 : line = JS7_UNDEC(c);
1146 8 : while ((c = getChar()) != EOF && JS7_ISDEC(c)) {
1147 4 : temp = 10 * line + JS7_UNDEC(c);
1148 4 : if (temp < line) {
1149 : /* Ignore overlarge line numbers. */
1150 0 : return true;
1151 : }
1152 4 : line = temp;
1153 : }
1154 6 : while (c != '\n' && c != EOF && IsSpaceOrBOM2(c))
1155 2 : c = getChar();
1156 2 : i = 0;
1157 2 : if (c == '"') {
1158 10 : while ((c = getChar()) != EOF && c != '"') {
1159 6 : if (c == '\n') {
1160 0 : ungetChar(c);
1161 0 : return true;
1162 : }
1163 6 : if ((c >> 8) != 0 || i >= sizeof filenameBuf - 1)
1164 0 : return true;
1165 6 : filenameBuf[i++] = (char) c;
1166 : }
1167 2 : if (c == '"') {
1168 4 : while ((c = getChar()) != '\n' && c != EOF && IsSpaceOrBOM2(c))
1169 0 : continue;
1170 : }
1171 : }
1172 2 : filenameBuf[i] = '\0';
1173 2 : if (c == EOF || c == '\n') {
1174 2 : if (i > 0) {
1175 2 : if (flags & TSF_OWNFILENAME)
1176 1 : cx->free_((void *) filename);
1177 2 : filename = JS_strdup(cx, filenameBuf);
1178 2 : if (!filename)
1179 0 : return false;
1180 2 : flags |= TSF_OWNFILENAME;
1181 : }
1182 2 : lineno = line;
1183 : }
1184 : }
1185 2 : ungetChar(c);
1186 : }
1187 2 : return true;
1188 : }
1189 :
1190 : bool
1191 636281 : TokenStream::getAtSourceMappingURL()
1192 : {
1193 : jschar peeked[18];
1194 :
1195 : /* Match comments of the form @sourceMappingURL=<url> */
1196 636281 : if (peekChars(18, peeked) && CharsMatch(peeked, "@sourceMappingURL=")) {
1197 1 : skipChars(18);
1198 1 : tokenbuf.clear();
1199 :
1200 : jschar c;
1201 44 : while (!IsSpaceOrBOM2((c = getChar())) &&
1202 : c && c != jschar(EOF))
1203 42 : tokenbuf.append(c);
1204 :
1205 1 : if (tokenbuf.empty())
1206 : /* The source map's URL was missing, but not quite an exception that
1207 : * we should stop and drop everything for, though. */
1208 0 : return true;
1209 :
1210 1 : int len = tokenbuf.length();
1211 :
1212 1 : if (sourceMap)
1213 0 : cx->free_(sourceMap);
1214 1 : sourceMap = (jschar *) cx->malloc_(sizeof(jschar) * (len + 1));
1215 1 : if (!sourceMap)
1216 0 : return false;
1217 :
1218 43 : for (int i = 0; i < len; i++)
1219 42 : sourceMap[i] = tokenbuf[i];
1220 1 : sourceMap[len] = '\0';
1221 : }
1222 636281 : return true;
1223 : }
1224 :
1225 : Token *
1226 116005818 : TokenStream::newToken(ptrdiff_t adjust)
1227 : {
1228 116005818 : cursor = (cursor + 1) & ntokensMask;
1229 116005818 : Token *tp = &tokens[cursor];
1230 116005818 : tp->ptr = userbuf.addressOfNextRawChar() + adjust;
1231 116005818 : tp->pos.begin.index = tp->ptr - linebase;
1232 116005818 : tp->pos.begin.lineno = tp->pos.end.lineno = lineno;
1233 116005818 : return tp;
1234 : }
1235 :
1236 : JS_ALWAYS_INLINE JSAtom *
1237 10473681 : TokenStream::atomize(JSContext *cx, CharBuffer &cb)
1238 : {
1239 10473681 : return js_AtomizeChars(cx, cb.begin(), cb.length());
1240 : }
1241 :
1242 : #ifdef DEBUG
1243 : bool
1244 115067179 : IsTokenSane(Token *tp)
1245 : {
1246 : /*
1247 : * Nb: TOK_EOL should never be used in an actual Token; it should only be
1248 : * returned as a TokenKind from peekTokenSameLine().
1249 : */
1250 115067179 : if (tp->type < TOK_ERROR || tp->type >= TOK_LIMIT || tp->type == TOK_EOL)
1251 0 : return false;
1252 :
1253 115067179 : if (tp->pos.begin.lineno == tp->pos.end.lineno) {
1254 115065765 : if (tp->pos.begin.index > tp->pos.end.index)
1255 0 : return false;
1256 : } else {
1257 : /* Only certain token kinds can be multi-line. */
1258 1414 : switch (tp->type) {
1259 : case TOK_STRING:
1260 : case TOK_XMLATTR:
1261 : case TOK_XMLSPACE:
1262 : case TOK_XMLTEXT:
1263 : case TOK_XMLCOMMENT:
1264 : case TOK_XMLCDATA:
1265 : case TOK_XMLPI:
1266 1414 : break;
1267 : default:
1268 0 : return false;
1269 : }
1270 : }
1271 115067179 : return true;
1272 : }
1273 : #endif
1274 :
1275 : bool
1276 0 : TokenStream::putIdentInTokenbuf(const jschar *identStart)
1277 : {
1278 : int32_t c, qc;
1279 0 : const jschar *tmp = userbuf.addressOfNextRawChar();
1280 0 : userbuf.setAddressOfNextRawChar(identStart);
1281 :
1282 0 : tokenbuf.clear();
1283 0 : for (;;) {
1284 0 : c = getCharIgnoreEOL();
1285 0 : if (!IsIdentifierPart(c)) {
1286 0 : if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
1287 : break;
1288 0 : c = qc;
1289 : }
1290 0 : if (!tokenbuf.append(c)) {
1291 0 : userbuf.setAddressOfNextRawChar(tmp);
1292 0 : return false;
1293 : }
1294 : }
1295 0 : userbuf.setAddressOfNextRawChar(tmp);
1296 0 : return true;
1297 : }
1298 :
1299 : bool
1300 24684089 : TokenStream::checkForKeyword(const jschar *s, size_t length, TokenKind *ttp, JSOp *topp)
1301 : {
1302 24684089 : JS_ASSERT(!ttp == !topp);
1303 :
1304 24684089 : const KeywordInfo *kw = FindKeyword(s, length);
1305 24684089 : if (!kw)
1306 16937389 : return true;
1307 :
1308 7746700 : if (kw->tokentype == TOK_RESERVED) {
1309 : return ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1310 0 : JSMSG_RESERVED_ID, kw->chars);
1311 : }
1312 :
1313 7746700 : if (kw->tokentype != TOK_STRICT_RESERVED) {
1314 7746613 : if (kw->version <= versionNumber()) {
1315 : /* Working keyword. */
1316 7746612 : if (ttp) {
1317 7746612 : *ttp = kw->tokentype;
1318 7746612 : *topp = (JSOp) kw->op;
1319 7746612 : return true;
1320 : }
1321 : return ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1322 0 : JSMSG_RESERVED_ID, kw->chars);
1323 : }
1324 :
1325 : /* The let keyword is reserved on <1.7 */
1326 1 : if (kw->tokentype == TOK_LET) {
1327 : return ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1328 1 : JSMSG_RESERVED_ID, kw->chars);
1329 : }
1330 :
1331 : /*
1332 : * The keyword is not in this version. Treat it as an identifier,
1333 : * unless it is yield which we treat as TOK_STRICT_RESERVED by
1334 : * falling through to the code below (ES5 forbids it in strict mode).
1335 : */
1336 0 : if (kw->tokentype != TOK_YIELD)
1337 0 : return true;
1338 : }
1339 :
1340 : /* Strict reserved word. */
1341 87 : if (isStrictMode())
1342 0 : return ReportStrictModeError(cx, this, NULL, NULL, JSMSG_RESERVED_ID, kw->chars);
1343 : return ReportCompileErrorNumber(cx, this, NULL, JSREPORT_STRICT | JSREPORT_WARNING,
1344 87 : JSMSG_RESERVED_ID, kw->chars);
1345 : }
1346 :
1347 : enum FirstCharKind {
1348 : Other,
1349 : OneChar,
1350 : Ident,
1351 : Dot,
1352 : Equals,
1353 : String,
1354 : Dec,
1355 : Colon,
1356 : Plus,
1357 : HexOct,
1358 :
1359 : /* These two must be last, so that |c >= Space| matches both. */
1360 : Space,
1361 : EOL
1362 : };
1363 :
1364 : #define _______ Other
1365 :
1366 : /*
1367 : * OneChar: 40, 41, 44, 59, 63, 91, 93, 123, 125: '(', ')', ',', ';', '?', '[', ']', '{', '}'
1368 : * Ident: 36, 65..90, 95, 97..122: '$', 'A'..'Z', '_', 'a'..'z'
1369 : * Dot: 46: '.'
1370 : * Equals: 61: '='
1371 : * String: 34, 39: '"', '\''
1372 : * Dec: 49..57: '1'..'9'
1373 : * Colon: 58: ':'
1374 : * Plus: 43: '+'
1375 : * HexOct: 48: '0'
1376 : * Space: 9, 11, 12: '\t', '\v', '\f'
1377 : * EOL: 10, 13: '\n', '\r'
1378 : */
1379 : static const uint8_t firstCharKinds[] = {
1380 : /* 0 1 2 3 4 5 6 7 8 9 */
1381 : /* 0+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, Space,
1382 : /* 10+ */ EOL, Space, Space, EOL, _______, _______, _______, _______, _______, _______,
1383 : /* 20+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
1384 : /* 30+ */ _______, _______, Space, _______, String, _______, Ident, _______, _______, String,
1385 : /* 40+ */ OneChar, OneChar, _______, Plus, OneChar, _______, Dot, _______, HexOct, Dec,
1386 : /* 50+ */ Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, Colon, OneChar,
1387 : /* 60+ */ _______, Equals, _______, OneChar, _______, Ident, Ident, Ident, Ident, Ident,
1388 : /* 70+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
1389 : /* 80+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
1390 : /* 90+ */ Ident, OneChar, _______, OneChar, _______, Ident, _______, Ident, Ident, Ident,
1391 : /* 100+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
1392 : /* 110+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
1393 : /* 120+ */ Ident, Ident, Ident, OneChar, _______, OneChar, _______, _______
1394 : };
1395 :
1396 : #undef _______
1397 :
1398 : TokenKind
1399 115067179 : TokenStream::getTokenInternal()
1400 : {
1401 : TokenKind tt;
1402 : int c, qc;
1403 : Token *tp;
1404 : FirstCharKind c1kind;
1405 : const jschar *numStart;
1406 : bool hasFracOrExp;
1407 : const jschar *identStart;
1408 : bool hadUnicodeEscape;
1409 :
1410 : #if JS_HAS_XML_SUPPORT
1411 : /*
1412 : * Look for XML text and tags.
1413 : */
1414 115067179 : if (flags & (TSF_XMLTEXTMODE|TSF_XMLTAGMODE)) {
1415 10078 : if (!getXMLTextOrTag(&tt, &tp))
1416 0 : goto error;
1417 10078 : goto out;
1418 : }
1419 : #endif
1420 :
1421 : retry:
1422 228093672 : if (JS_UNLIKELY(!userbuf.hasRawChars())) {
1423 171663 : tp = newToken(0);
1424 171663 : tt = TOK_EOF;
1425 171663 : flags |= TSF_EOF;
1426 171663 : goto out;
1427 : }
1428 :
1429 227922009 : c = userbuf.getRawChar();
1430 227922009 : JS_ASSERT(c != EOF);
1431 :
1432 : /*
1433 : * Chars not in the range 0..127 are rare. Getting them out of the way
1434 : * early allows subsequent checking to be faster.
1435 : */
1436 227922009 : if (JS_UNLIKELY(c >= 128)) {
1437 1 : if (IsSpaceOrBOM2(c)) {
1438 1 : if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) {
1439 0 : updateLineInfoForEOL();
1440 0 : updateFlagsForEOL();
1441 : }
1442 :
1443 1 : goto retry;
1444 : }
1445 :
1446 0 : tp = newToken(-1);
1447 :
1448 : /* '$' and '_' don't pass IsLetter, but they're < 128 so never appear here. */
1449 : JS_STATIC_ASSERT('$' < 128 && '_' < 128);
1450 0 : if (IsLetter(c)) {
1451 0 : identStart = userbuf.addressOfNextRawChar() - 1;
1452 0 : hadUnicodeEscape = false;
1453 0 : goto identifier;
1454 : }
1455 :
1456 0 : goto badchar;
1457 : }
1458 :
1459 : /*
1460 : * Get the token kind, based on the first char. The ordering of c1kind
1461 : * comparison is based on the frequency of tokens in real code. Minified
1462 : * and non-minified code have different characteristics, mostly in that
1463 : * whitespace occurs much less in minified code. Token kinds that fall in
1464 : * the 'Other' category typically account for less than 2% of all tokens,
1465 : * so their order doesn't matter much.
1466 : */
1467 227922008 : c1kind = FirstCharKind(firstCharKinds[c]);
1468 :
1469 : /*
1470 : * Skip over whitespace chars; update line state on EOLs. Even though
1471 : * whitespace isn't very common in minified code we have to handle it first
1472 : * (and jump back to 'retry') before calling newToken().
1473 : */
1474 227922008 : if (c1kind >= Space) {
1475 112097931 : if (c1kind == EOL) {
1476 : /* If it's a \r\n sequence: treat as a single EOL, skip over the \n. */
1477 15998690 : if (c == '\r' && userbuf.hasRawChars())
1478 39556 : userbuf.matchRawChar('\n');
1479 15998690 : updateLineInfoForEOL();
1480 15998690 : updateFlagsForEOL();
1481 : }
1482 112097931 : goto retry;
1483 : }
1484 :
1485 115824077 : tp = newToken(-1);
1486 :
1487 : /*
1488 : * Look for an unambiguous single-char token.
1489 : */
1490 115824077 : if (c1kind == OneChar) {
1491 48771495 : tt = (TokenKind)oneCharTokens[c];
1492 48771495 : goto out;
1493 : }
1494 :
1495 : /*
1496 : * Look for an identifier.
1497 : */
1498 67052582 : if (c1kind == Ident) {
1499 32565731 : identStart = userbuf.addressOfNextRawChar() - 1;
1500 32565731 : hadUnicodeEscape = false;
1501 :
1502 : identifier:
1503 209339846 : for (;;) {
1504 241905577 : c = getCharIgnoreEOL();
1505 241905577 : if (c == EOF)
1506 11304 : break;
1507 241894273 : if (!IsIdentifierPart(c)) {
1508 32554427 : if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
1509 32554427 : break;
1510 0 : hadUnicodeEscape = true;
1511 : }
1512 : }
1513 32565731 : ungetCharIgnoreEOL(c);
1514 :
1515 : /* Convert the escapes by putting into tokenbuf. */
1516 32565731 : if (hadUnicodeEscape && !putIdentInTokenbuf(identStart))
1517 0 : goto error;
1518 :
1519 : /* Check for keywords unless parser asks us to ignore keywords. */
1520 32565731 : if (!(flags & TSF_KEYWORD_IS_NAME)) {
1521 : const jschar *chars;
1522 : size_t length;
1523 24683891 : if (hadUnicodeEscape) {
1524 0 : chars = tokenbuf.begin();
1525 0 : length = tokenbuf.length();
1526 : } else {
1527 24683891 : chars = identStart;
1528 24683891 : length = userbuf.addressOfNextRawChar() - identStart;
1529 : }
1530 24683891 : tt = TOK_NAME;
1531 24683891 : if (!checkForKeyword(chars, length, &tt, &tp->t_op))
1532 1 : goto error;
1533 24683890 : if (tt != TOK_NAME)
1534 7746612 : goto out;
1535 : }
1536 :
1537 : /*
1538 : * Identifiers containing no Unicode escapes can be atomized directly
1539 : * from userbuf. The rest must use the escapes converted via
1540 : * tokenbuf before atomizing.
1541 : */
1542 : JSAtom *atom;
1543 24819118 : if (!hadUnicodeEscape)
1544 24819118 : atom = js_AtomizeChars(cx, identStart, userbuf.addressOfNextRawChar() - identStart);
1545 : else
1546 0 : atom = atomize(cx, tokenbuf);
1547 24819118 : if (!atom)
1548 0 : goto error;
1549 24819118 : tp->setName(JSOP_NAME, atom->asPropertyName());
1550 24819118 : tt = TOK_NAME;
1551 24819118 : goto out;
1552 : }
1553 :
1554 34486851 : if (c1kind == Dot) {
1555 6483606 : c = getCharIgnoreEOL();
1556 6483606 : if (JS7_ISDEC(c)) {
1557 201 : numStart = userbuf.addressOfNextRawChar() - 2;
1558 201 : goto decimal_dot;
1559 : }
1560 : #if JS_HAS_XML_SUPPORT
1561 6483405 : if (c == '.') {
1562 9 : tt = TOK_DBLDOT;
1563 9 : goto out;
1564 : }
1565 : #endif
1566 6483396 : ungetCharIgnoreEOL(c);
1567 6483396 : tt = TOK_DOT;
1568 6483396 : goto out;
1569 : }
1570 :
1571 28003245 : if (c1kind == Equals) {
1572 4448141 : if (matchChar('=')) {
1573 258605 : if (matchChar('=')) {
1574 41533 : tp->t_op = JSOP_STRICTEQ;
1575 41533 : tt = TOK_STRICTEQ;
1576 : } else {
1577 217072 : tp->t_op = JSOP_EQ;
1578 217072 : tt = TOK_EQ;
1579 : }
1580 : } else {
1581 4189536 : tp->t_op = JSOP_NOP;
1582 4189536 : tt = TOK_ASSIGN;
1583 : }
1584 4448141 : goto out;
1585 : }
1586 :
1587 : /*
1588 : * Look for a string.
1589 : */
1590 23555104 : if (c1kind == String) {
1591 10469420 : qc = c;
1592 10469420 : tokenbuf.clear();
1593 591889680 : while (true) {
1594 : /*
1595 : * We need to detect any of these chars: " or ', \n (or its
1596 : * equivalents), \\, EOF. We use maybeStrSpecial[] in a manner
1597 : * similar to maybeEOL[], see above. Because we detect EOL
1598 : * sequences here and put them back immediately, we can use
1599 : * getCharIgnoreEOL().
1600 : */
1601 602359100 : c = getCharIgnoreEOL();
1602 602359100 : if (maybeStrSpecial[c & 0xff]) {
1603 42011207 : if (c == qc)
1604 : break;
1605 31541787 : if (c == '\\') {
1606 27699393 : switch (c = getChar()) {
1607 908 : case 'b': c = '\b'; break;
1608 926 : case 'f': c = '\f'; break;
1609 212666 : case 'n': c = '\n'; break;
1610 7848 : case 'r': c = '\r'; break;
1611 6160 : case 't': c = '\t'; break;
1612 908 : case 'v': c = '\v'; break;
1613 :
1614 : default:
1615 27469977 : if ('0' <= c && c < '8') {
1616 775 : int32_t val = JS7_UNDEC(c);
1617 :
1618 775 : c = peekChar();
1619 : /* Strict mode code allows only \0, then a non-digit. */
1620 775 : if (val != 0 || JS7_ISDEC(c)) {
1621 703 : if (!ReportStrictModeError(cx, this, NULL, NULL,
1622 703 : JSMSG_DEPRECATED_OCTAL)) {
1623 0 : goto error;
1624 : }
1625 703 : setOctalCharacterEscape();
1626 : }
1627 775 : if ('0' <= c && c < '8') {
1628 703 : val = 8 * val + JS7_UNDEC(c);
1629 703 : getChar();
1630 703 : c = peekChar();
1631 703 : if ('0' <= c && c < '8') {
1632 703 : int32_t save = val;
1633 703 : val = 8 * val + JS7_UNDEC(c);
1634 703 : if (val <= 0377)
1635 703 : getChar();
1636 : else
1637 0 : val = save;
1638 : }
1639 : }
1640 :
1641 775 : c = (jschar)val;
1642 27469202 : } else if (c == 'u') {
1643 : jschar cp[4];
1644 796428 : if (peekChars(4, cp) &&
1645 353968 : JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
1646 353968 : JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
1647 176984 : c = (((((JS7_UNHEX(cp[0]) << 4)
1648 176984 : + JS7_UNHEX(cp[1])) << 4)
1649 176984 : + JS7_UNHEX(cp[2])) << 4)
1650 530952 : + JS7_UNHEX(cp[3]);
1651 88492 : skipChars(4);
1652 : } else {
1653 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1654 0 : JSMSG_MALFORMED_ESCAPE, "Unicode");
1655 0 : goto error;
1656 : }
1657 27380710 : } else if (c == 'x') {
1658 : jschar cp[2];
1659 61960 : if (peekChars(2, cp) &&
1660 49568 : JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
1661 12392 : c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
1662 12392 : skipChars(2);
1663 : } else {
1664 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1665 0 : JSMSG_MALFORMED_ESCAPE, "hexadecimal");
1666 0 : goto error;
1667 : }
1668 27368318 : } else if (c == '\n') {
1669 : /*
1670 : * ES5 7.8.4: an escaped line terminator represents
1671 : * no character.
1672 : */
1673 23361 : continue;
1674 : }
1675 27446616 : break;
1676 : }
1677 3842394 : } else if (TokenBuf::isRawEOLChar(c) || c == EOF) {
1678 0 : ungetCharIgnoreEOL(c);
1679 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1680 0 : JSMSG_UNTERMINATED_STRING);
1681 0 : goto error;
1682 : }
1683 : }
1684 591866319 : if (!tokenbuf.append(c))
1685 0 : goto error;
1686 : }
1687 10469420 : JSAtom *atom = atomize(cx, tokenbuf);
1688 10469420 : if (!atom)
1689 0 : goto error;
1690 10469420 : tp->pos.end.lineno = lineno;
1691 10469420 : tp->setAtom(JSOP_STRING, atom);
1692 10469420 : tt = TOK_STRING;
1693 10469420 : goto out;
1694 : }
1695 :
1696 : /*
1697 : * Look for a decimal number.
1698 : */
1699 13085684 : if (c1kind == Dec) {
1700 3882242 : numStart = userbuf.addressOfNextRawChar() - 1;
1701 :
1702 : decimal:
1703 5872072 : hasFracOrExp = false;
1704 23439712 : while (JS7_ISDEC(c))
1705 11695568 : c = getCharIgnoreEOL();
1706 :
1707 5872072 : if (c == '.') {
1708 : decimal_dot:
1709 6338 : hasFracOrExp = true;
1710 28015 : do {
1711 28015 : c = getCharIgnoreEOL();
1712 : } while (JS7_ISDEC(c));
1713 : }
1714 5872273 : if (c == 'e' || c == 'E') {
1715 1166 : hasFracOrExp = true;
1716 1166 : c = getCharIgnoreEOL();
1717 1166 : if (c == '+' || c == '-')
1718 402 : c = getCharIgnoreEOL();
1719 1166 : if (!JS7_ISDEC(c)) {
1720 0 : ungetCharIgnoreEOL(c);
1721 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1722 0 : JSMSG_MISSING_EXPONENT);
1723 0 : goto error;
1724 : }
1725 2254 : do {
1726 2254 : c = getCharIgnoreEOL();
1727 : } while (JS7_ISDEC(c));
1728 : }
1729 5872273 : ungetCharIgnoreEOL(c);
1730 :
1731 5872273 : if (c != EOF && IsIdentifierStart(c)) {
1732 0 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_IDSTART_AFTER_NUMBER);
1733 0 : goto error;
1734 : }
1735 :
1736 : /*
1737 : * Unlike identifiers and strings, numbers cannot contain escaped
1738 : * chars, so we don't need to use tokenbuf. Instead we can just
1739 : * convert the jschars in userbuf directly to the numeric value.
1740 : */
1741 : double dval;
1742 : const jschar *dummy;
1743 5872273 : if (!hasFracOrExp) {
1744 5865248 : if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), 10, &dummy, &dval))
1745 0 : goto error;
1746 : } else {
1747 7025 : if (!js_strtod(cx, numStart, userbuf.addressOfNextRawChar(), &dummy, &dval))
1748 0 : goto error;
1749 : }
1750 5872273 : tp->setNumber(dval);
1751 5872273 : tt = TOK_NUMBER;
1752 5872273 : goto out;
1753 : }
1754 :
1755 9203442 : if (c1kind == Colon) {
1756 : #if JS_HAS_XML_SUPPORT
1757 961850 : if (matchChar(':')) {
1758 72 : tt = TOK_DBLCOLON;
1759 72 : goto out;
1760 : }
1761 : #endif
1762 961778 : tp->t_op = JSOP_NOP;
1763 961778 : tt = TOK_COLON;
1764 961778 : goto out;
1765 : }
1766 :
1767 8241592 : if (c1kind == Plus) {
1768 4084032 : if (matchChar('=')) {
1769 66622 : tp->t_op = JSOP_ADD;
1770 66622 : tt = TOK_ADDASSIGN;
1771 4017410 : } else if (matchChar('+')) {
1772 1724810 : tt = TOK_INC;
1773 : } else {
1774 2292600 : tp->t_op = JSOP_POS;
1775 2292600 : tt = TOK_PLUS;
1776 : }
1777 4084032 : goto out;
1778 : }
1779 :
1780 : /*
1781 : * Look for a hexadecimal or octal number.
1782 : */
1783 4157560 : if (c1kind == HexOct) {
1784 : int radix;
1785 2015332 : c = getCharIgnoreEOL();
1786 2015332 : if (c == 'x' || c == 'X') {
1787 19971 : radix = 16;
1788 19971 : c = getCharIgnoreEOL();
1789 19971 : if (!JS7_ISHEX(c)) {
1790 0 : ungetCharIgnoreEOL(c);
1791 0 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_MISSING_HEXDIGITS);
1792 0 : goto error;
1793 : }
1794 19971 : numStart = userbuf.addressOfNextRawChar() - 1; /* one past the '0x' */
1795 99800 : while (JS7_ISHEX(c))
1796 59858 : c = getCharIgnoreEOL();
1797 1995361 : } else if (JS7_ISDEC(c)) {
1798 5531 : radix = 8;
1799 5531 : numStart = userbuf.addressOfNextRawChar() - 1; /* one past the '0' */
1800 27655 : while (JS7_ISDEC(c)) {
1801 : /* Octal integer literals are not permitted in strict mode code. */
1802 16593 : if (!ReportStrictModeError(cx, this, NULL, NULL, JSMSG_DEPRECATED_OCTAL))
1803 0 : goto error;
1804 :
1805 : /*
1806 : * Outside strict mode, we permit 08 and 09 as decimal numbers,
1807 : * which makes our behaviour a superset of the ECMA numeric
1808 : * grammar. We might not always be so permissive, so we warn
1809 : * about it.
1810 : */
1811 16593 : if (c >= '8') {
1812 0 : if (!ReportCompileErrorNumber(cx, this, NULL, JSREPORT_WARNING,
1813 0 : JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) {
1814 0 : goto error;
1815 : }
1816 0 : goto decimal; /* use the decimal scanner for the rest of the number */
1817 : }
1818 16593 : c = getCharIgnoreEOL();
1819 : }
1820 : } else {
1821 : /* '0' not followed by 'x', 'X' or a digit; scan as a decimal number. */
1822 1989830 : numStart = userbuf.addressOfNextRawChar() - 1;
1823 1989830 : goto decimal;
1824 : }
1825 25502 : ungetCharIgnoreEOL(c);
1826 :
1827 25502 : if (c != EOF && IsIdentifierStart(c)) {
1828 0 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_IDSTART_AFTER_NUMBER);
1829 0 : goto error;
1830 : }
1831 :
1832 : double dval;
1833 : const jschar *dummy;
1834 25502 : if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), radix, &dummy, &dval))
1835 0 : goto error;
1836 25502 : tp->setNumber(dval);
1837 25502 : tt = TOK_NUMBER;
1838 25502 : goto out;
1839 : }
1840 :
1841 : /*
1842 : * This handles everything else.
1843 : */
1844 2142228 : JS_ASSERT(c1kind == Other);
1845 2142228 : switch (c) {
1846 : case '\\':
1847 0 : hadUnicodeEscape = matchUnicodeEscapeIdStart(&qc);
1848 0 : if (hadUnicodeEscape) {
1849 0 : identStart = userbuf.addressOfNextRawChar() - 6;
1850 0 : goto identifier;
1851 : }
1852 0 : goto badchar;
1853 :
1854 : case '|':
1855 162520 : if (matchChar(c)) {
1856 120012 : tt = TOK_OR;
1857 42508 : } else if (matchChar('=')) {
1858 12310 : tp->t_op = JSOP_BITOR;
1859 12310 : tt = TOK_BITORASSIGN;
1860 : } else {
1861 30198 : tt = TOK_BITOR;
1862 : }
1863 162520 : break;
1864 :
1865 : case '^':
1866 2650 : if (matchChar('=')) {
1867 54 : tp->t_op = JSOP_BITXOR;
1868 54 : tt = TOK_BITXORASSIGN;
1869 : } else {
1870 2596 : tt = TOK_BITXOR;
1871 : }
1872 2650 : break;
1873 :
1874 : case '&':
1875 163067 : if (matchChar('&')) {
1876 152222 : tt = TOK_AND;
1877 10845 : } else if (matchChar('=')) {
1878 186 : tp->t_op = JSOP_BITAND;
1879 186 : tt = TOK_BITANDASSIGN;
1880 : } else {
1881 10659 : tt = TOK_BITAND;
1882 : }
1883 163067 : break;
1884 :
1885 : case '!':
1886 513516 : if (matchChar('=')) {
1887 132783 : if (matchChar('=')) {
1888 21138 : tp->t_op = JSOP_STRICTNE;
1889 21138 : tt = TOK_STRICTNE;
1890 : } else {
1891 111645 : tp->t_op = JSOP_NE;
1892 111645 : tt = TOK_NE;
1893 : }
1894 : } else {
1895 380733 : tp->t_op = JSOP_NOT;
1896 380733 : tt = TOK_NOT;
1897 : }
1898 513516 : break;
1899 :
1900 : #if JS_HAS_XML_SUPPORT
1901 : case '@':
1902 18 : tt = TOK_AT;
1903 18 : break;
1904 : #endif
1905 :
1906 : case '<':
1907 : #if JS_HAS_XML_SUPPORT
1908 108287 : if ((flags & TSF_OPERAND) && !isStrictMode() && (hasXML() || peekChar() != '!')) {
1909 2860 : if (!getXMLMarkup(&tt, &tp))
1910 0 : goto error;
1911 2860 : goto out;
1912 : }
1913 : #endif
1914 :
1915 : /* NB: treat HTML begin-comment as comment-till-end-of-line */
1916 105427 : if (matchChar('!')) {
1917 9 : if (matchChar('-')) {
1918 0 : if (matchChar('-')) {
1919 0 : flags |= TSF_IN_HTML_COMMENT;
1920 0 : goto skipline;
1921 : }
1922 0 : ungetChar('-');
1923 : }
1924 9 : ungetChar('!');
1925 : }
1926 105427 : if (matchChar('<')) {
1927 2704 : tp->t_op = JSOP_LSH;
1928 2704 : tt = matchChar('=') ? TOK_LSHASSIGN : TOK_LSH;
1929 : } else {
1930 102723 : if (matchChar('=')) {
1931 9857 : tp->t_op = JSOP_LE;
1932 9857 : tt = TOK_LE;
1933 : } else {
1934 92866 : tp->t_op = JSOP_LT;
1935 92866 : tt = TOK_LT;
1936 : }
1937 : }
1938 105427 : break;
1939 :
1940 : case '>':
1941 61915 : if (matchChar('>')) {
1942 5441 : if (matchChar('>')) {
1943 1428 : tp->t_op = JSOP_URSH;
1944 1428 : tt = matchChar('=') ? TOK_URSHASSIGN : TOK_URSH;
1945 : } else {
1946 4013 : tp->t_op = JSOP_RSH;
1947 4013 : tt = matchChar('=') ? TOK_RSHASSIGN : TOK_RSH;
1948 : }
1949 : } else {
1950 56474 : if (matchChar('=')) {
1951 15650 : tp->t_op = JSOP_GE;
1952 15650 : tt = TOK_GE;
1953 : } else {
1954 40824 : tp->t_op = JSOP_GT;
1955 40824 : tt = TOK_GT;
1956 : }
1957 : }
1958 61915 : break;
1959 :
1960 : case '*':
1961 10928 : tp->t_op = JSOP_MUL;
1962 10928 : tt = matchChar('=') ? TOK_MULASSIGN : TOK_STAR;
1963 10928 : break;
1964 :
1965 : case '/':
1966 : /*
1967 : * Look for a single-line comment.
1968 : */
1969 981059 : if (matchChar('/')) {
1970 636281 : if (cx->hasAtLineOption() && !getAtLine())
1971 0 : goto error;
1972 :
1973 636281 : if (!getAtSourceMappingURL())
1974 0 : goto error;
1975 :
1976 : skipline:
1977 : /* Optimize line skipping if we are not in an HTML comment. */
1978 636281 : if (flags & TSF_IN_HTML_COMMENT) {
1979 0 : while ((c = getChar()) != EOF && c != '\n') {
1980 0 : if (c == '-' && matchChar('-') && matchChar('>'))
1981 0 : flags &= ~TSF_IN_HTML_COMMENT;
1982 : }
1983 : } else {
1984 33172807 : while ((c = getChar()) != EOF && c != '\n')
1985 31900245 : continue;
1986 : }
1987 636281 : ungetChar(c);
1988 636281 : cursor = (cursor - 1) & ntokensMask;
1989 636281 : goto retry;
1990 : }
1991 :
1992 : /*
1993 : * Look for a multi-line comment.
1994 : */
1995 344778 : if (matchChar('*')) {
1996 302358 : unsigned linenoBefore = lineno;
1997 82276876 : while ((c = getChar()) != EOF &&
1998 2538132 : !(c == '*' && matchChar('/'))) {
1999 : /* Ignore all characters until comment close. */
2000 : }
2001 302358 : if (c == EOF) {
2002 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
2003 0 : JSMSG_UNTERMINATED_COMMENT);
2004 0 : goto error;
2005 : }
2006 302358 : if (linenoBefore != lineno)
2007 264572 : updateFlagsForEOL();
2008 302358 : cursor = (cursor - 1) & ntokensMask;
2009 302358 : goto retry;
2010 : }
2011 :
2012 : /*
2013 : * Look for a regexp.
2014 : */
2015 42420 : if (flags & TSF_OPERAND) {
2016 33985 : tokenbuf.clear();
2017 :
2018 33985 : bool inCharClass = false;
2019 396201 : for (;;) {
2020 430186 : c = getChar();
2021 430186 : if (c == '\\') {
2022 29229 : if (!tokenbuf.append(c))
2023 0 : goto error;
2024 29229 : c = getChar();
2025 400957 : } else if (c == '[') {
2026 12203 : inCharClass = true;
2027 388754 : } else if (c == ']') {
2028 12194 : inCharClass = false;
2029 376560 : } else if (c == '/' && !inCharClass) {
2030 : /* For compat with IE, allow unescaped / in char classes. */
2031 : break;
2032 : }
2033 396201 : if (c == '\n' || c == EOF) {
2034 0 : ungetChar(c);
2035 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
2036 0 : JSMSG_UNTERMINATED_REGEXP);
2037 0 : goto error;
2038 : }
2039 396201 : if (!tokenbuf.append(c))
2040 0 : goto error;
2041 : }
2042 :
2043 33985 : RegExpFlag reflags = NoFlags;
2044 33985 : unsigned length = tokenbuf.length() + 1;
2045 23498 : while (true) {
2046 57483 : c = peekChar();
2047 57483 : if (c == 'g' && !(reflags & GlobalFlag))
2048 20881 : reflags = RegExpFlag(reflags | GlobalFlag);
2049 36602 : else if (c == 'i' && !(reflags & IgnoreCaseFlag))
2050 2589 : reflags = RegExpFlag(reflags | IgnoreCaseFlag);
2051 34013 : else if (c == 'm' && !(reflags & MultilineFlag))
2052 1 : reflags = RegExpFlag(reflags | MultilineFlag);
2053 34012 : else if (c == 'y' && !(reflags & StickyFlag))
2054 27 : reflags = RegExpFlag(reflags | StickyFlag);
2055 : else
2056 : break;
2057 23498 : getChar();
2058 23498 : length++;
2059 : }
2060 :
2061 33985 : c = peekChar();
2062 67970 : if (JS7_ISLET(c)) {
2063 0 : char buf[2] = { '\0', '\0' };
2064 0 : tp->pos.begin.index += length + 1;
2065 0 : buf[0] = char(c);
2066 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_REGEXP_FLAG,
2067 0 : buf);
2068 0 : (void) getChar();
2069 0 : goto error;
2070 : }
2071 33985 : tp->setRegExpFlags(reflags);
2072 33985 : tt = TOK_REGEXP;
2073 33985 : break;
2074 : }
2075 :
2076 8435 : tp->t_op = JSOP_DIV;
2077 8435 : tt = matchChar('=') ? TOK_DIVASSIGN : TOK_DIV;
2078 8435 : break;
2079 :
2080 : case '%':
2081 3028 : tp->t_op = JSOP_MOD;
2082 3028 : tt = matchChar('=') ? TOK_MODASSIGN : TOK_MOD;
2083 3028 : break;
2084 :
2085 : case '~':
2086 285 : tp->t_op = JSOP_BITNOT;
2087 285 : tt = TOK_BITNOT;
2088 285 : break;
2089 :
2090 : case '-':
2091 134937 : if (matchChar('=')) {
2092 1507 : tp->t_op = JSOP_SUB;
2093 1507 : tt = TOK_SUBASSIGN;
2094 133430 : } else if (matchChar(c)) {
2095 7585 : if (peekChar() == '>' && !(flags & TSF_DIRTYLINE)) {
2096 0 : flags &= ~TSF_IN_HTML_COMMENT;
2097 0 : goto skipline;
2098 : }
2099 7585 : tt = TOK_DEC;
2100 : } else {
2101 125845 : tp->t_op = JSOP_NEG;
2102 125845 : tt = TOK_MINUS;
2103 : }
2104 134937 : break;
2105 :
2106 : badchar:
2107 : default:
2108 18 : ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_ILLEGAL_CHARACTER);
2109 18 : goto error;
2110 : }
2111 :
2112 : out:
2113 115067160 : flags |= TSF_DIRTYLINE;
2114 115067160 : tp->pos.end.index = userbuf.addressOfNextRawChar() - linebase;
2115 115067160 : tp->type = tt;
2116 115067160 : JS_ASSERT(IsTokenSane(tp));
2117 115067160 : return tt;
2118 :
2119 : error:
2120 : /*
2121 : * For erroneous multi-line tokens we won't have changed end.lineno (it'll
2122 : * still be equal to begin.lineno) so we revert end.index to be equal to
2123 : * begin.index + 1 (as if it's a 1-char token) to avoid having inconsistent
2124 : * begin/end positions. end.index isn't used in error messages anyway.
2125 : */
2126 19 : flags |= TSF_DIRTYLINE;
2127 19 : tp->pos.end.index = tp->pos.begin.index + 1;
2128 19 : tp->type = TOK_ERROR;
2129 19 : JS_ASSERT(IsTokenSane(tp));
2130 : #ifdef DEBUG
2131 : /*
2132 : * Poisoning userbuf on error establishes an invariant: once an erroneous
2133 : * token has been seen, userbuf will not be consulted again. This is true
2134 : * because the parser will either (a) deal with the TOK_ERROR token by
2135 : * aborting parsing immediately; or (b) if the TOK_ERROR token doesn't
2136 : * match what it expected, it will unget the token, and the next getToken()
2137 : * call will immediately return the just-gotten TOK_ERROR token again
2138 : * without consulting userbuf, thanks to the lookahead buffer.
2139 : */
2140 19 : userbuf.poison();
2141 : #endif
2142 19 : return TOK_ERROR;
2143 : }
2144 :
2145 : JS_FRIEND_API(int)
2146 0 : js_fgets(char *buf, int size, FILE *file)
2147 : {
2148 : int n, i, c;
2149 : JSBool crflag;
2150 :
2151 0 : n = size - 1;
2152 0 : if (n < 0)
2153 0 : return -1;
2154 :
2155 0 : crflag = JS_FALSE;
2156 0 : for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) {
2157 0 : buf[i] = c;
2158 0 : if (c == '\n') { /* any \n ends a line */
2159 0 : i++; /* keep the \n; we know there is room for \0 */
2160 0 : break;
2161 : }
2162 0 : if (crflag) { /* \r not followed by \n ends line at the \r */
2163 0 : ungetc(c, file);
2164 0 : break; /* and overwrite c in buf with \0 */
2165 : }
2166 0 : crflag = (c == '\r');
2167 : }
2168 :
2169 0 : buf[i] = '\0';
2170 0 : return i;
2171 : }
2172 :
2173 : #ifdef DEBUG
2174 : const char *
2175 0 : TokenKindToString(TokenKind tt)
2176 : {
2177 0 : switch (tt) {
2178 0 : case TOK_ERROR: return "TOK_ERROR";
2179 0 : case TOK_EOF: return "TOK_EOF";
2180 0 : case TOK_EOL: return "TOK_EOL";
2181 0 : case TOK_SEMI: return "TOK_SEMI";
2182 0 : case TOK_COMMA: return "TOK_COMMA";
2183 0 : case TOK_HOOK: return "TOK_HOOK";
2184 0 : case TOK_COLON: return "TOK_COLON";
2185 0 : case TOK_OR: return "TOK_OR";
2186 0 : case TOK_AND: return "TOK_AND";
2187 0 : case TOK_BITOR: return "TOK_BITOR";
2188 0 : case TOK_BITXOR: return "TOK_BITXOR";
2189 0 : case TOK_BITAND: return "TOK_BITAND";
2190 0 : case TOK_PLUS: return "TOK_PLUS";
2191 0 : case TOK_MINUS: return "TOK_MINUS";
2192 0 : case TOK_STAR: return "TOK_STAR";
2193 0 : case TOK_DIV: return "TOK_DIV";
2194 0 : case TOK_MOD: return "TOK_MOD";
2195 0 : case TOK_INC: return "TOK_INC";
2196 0 : case TOK_DEC: return "TOK_DEC";
2197 0 : case TOK_DOT: return "TOK_DOT";
2198 0 : case TOK_LB: return "TOK_LB";
2199 0 : case TOK_RB: return "TOK_RB";
2200 0 : case TOK_LC: return "TOK_LC";
2201 0 : case TOK_RC: return "TOK_RC";
2202 0 : case TOK_LP: return "TOK_LP";
2203 0 : case TOK_RP: return "TOK_RP";
2204 0 : case TOK_NAME: return "TOK_NAME";
2205 0 : case TOK_NUMBER: return "TOK_NUMBER";
2206 0 : case TOK_STRING: return "TOK_STRING";
2207 0 : case TOK_REGEXP: return "TOK_REGEXP";
2208 0 : case TOK_TRUE: return "TOK_TRUE";
2209 0 : case TOK_FALSE: return "TOK_FALSE";
2210 0 : case TOK_NULL: return "TOK_NULL";
2211 0 : case TOK_THIS: return "TOK_THIS";
2212 0 : case TOK_FUNCTION: return "TOK_FUNCTION";
2213 0 : case TOK_IF: return "TOK_IF";
2214 0 : case TOK_ELSE: return "TOK_ELSE";
2215 0 : case TOK_SWITCH: return "TOK_SWITCH";
2216 0 : case TOK_CASE: return "TOK_CASE";
2217 0 : case TOK_DEFAULT: return "TOK_DEFAULT";
2218 0 : case TOK_WHILE: return "TOK_WHILE";
2219 0 : case TOK_DO: return "TOK_DO";
2220 0 : case TOK_FOR: return "TOK_FOR";
2221 0 : case TOK_BREAK: return "TOK_BREAK";
2222 0 : case TOK_CONTINUE: return "TOK_CONTINUE";
2223 0 : case TOK_IN: return "TOK_IN";
2224 0 : case TOK_VAR: return "TOK_VAR";
2225 0 : case TOK_CONST: return "TOK_CONST";
2226 0 : case TOK_WITH: return "TOK_WITH";
2227 0 : case TOK_RETURN: return "TOK_RETURN";
2228 0 : case TOK_NEW: return "TOK_NEW";
2229 0 : case TOK_DELETE: return "TOK_DELETE";
2230 0 : case TOK_TRY: return "TOK_TRY";
2231 0 : case TOK_CATCH: return "TOK_CATCH";
2232 0 : case TOK_FINALLY: return "TOK_FINALLY";
2233 0 : case TOK_THROW: return "TOK_THROW";
2234 0 : case TOK_INSTANCEOF: return "TOK_INSTANCEOF";
2235 0 : case TOK_DEBUGGER: return "TOK_DEBUGGER";
2236 0 : case TOK_XMLSTAGO: return "TOK_XMLSTAGO";
2237 0 : case TOK_XMLETAGO: return "TOK_XMLETAGO";
2238 0 : case TOK_XMLPTAGC: return "TOK_XMLPTAGC";
2239 0 : case TOK_XMLTAGC: return "TOK_XMLTAGC";
2240 0 : case TOK_XMLNAME: return "TOK_XMLNAME";
2241 0 : case TOK_XMLATTR: return "TOK_XMLATTR";
2242 0 : case TOK_XMLSPACE: return "TOK_XMLSPACE";
2243 0 : case TOK_XMLTEXT: return "TOK_XMLTEXT";
2244 0 : case TOK_XMLCOMMENT: return "TOK_XMLCOMMENT";
2245 0 : case TOK_XMLCDATA: return "TOK_XMLCDATA";
2246 0 : case TOK_XMLPI: return "TOK_XMLPI";
2247 0 : case TOK_AT: return "TOK_AT";
2248 0 : case TOK_DBLCOLON: return "TOK_DBLCOLON";
2249 0 : case TOK_DBLDOT: return "TOK_DBLDOT";
2250 0 : case TOK_FILTER: return "TOK_FILTER";
2251 0 : case TOK_XMLELEM: return "TOK_XMLELEM";
2252 0 : case TOK_XMLLIST: return "TOK_XMLLIST";
2253 0 : case TOK_YIELD: return "TOK_YIELD";
2254 0 : case TOK_LEXICALSCOPE: return "TOK_LEXICALSCOPE";
2255 0 : case TOK_LET: return "TOK_LET";
2256 0 : case TOK_RESERVED: return "TOK_RESERVED";
2257 0 : case TOK_STRICT_RESERVED: return "TOK_STRICT_RESERVED";
2258 0 : case TOK_STRICTEQ: return "TOK_STRICTEQ";
2259 0 : case TOK_EQ: return "TOK_EQ";
2260 0 : case TOK_STRICTNE: return "TOK_STRICTNE";
2261 0 : case TOK_NE: return "TOK_NE";
2262 0 : case TOK_TYPEOF: return "TOK_TYPEOF";
2263 0 : case TOK_VOID: return "TOK_VOID";
2264 0 : case TOK_NOT: return "TOK_NOT";
2265 0 : case TOK_BITNOT: return "TOK_BITNOT";
2266 0 : case TOK_LT: return "TOK_LT";
2267 0 : case TOK_LE: return "TOK_LE";
2268 0 : case TOK_GT: return "TOK_GT";
2269 0 : case TOK_GE: return "TOK_GE";
2270 0 : case TOK_LSH: return "TOK_LSH";
2271 0 : case TOK_RSH: return "TOK_RSH";
2272 0 : case TOK_URSH: return "TOK_URSH";
2273 0 : case TOK_ASSIGN: return "TOK_ASSIGN";
2274 0 : case TOK_ADDASSIGN: return "TOK_ADDASSIGN";
2275 0 : case TOK_SUBASSIGN: return "TOK_SUBASSIGN";
2276 0 : case TOK_BITORASSIGN: return "TOK_BITORASSIGN";
2277 0 : case TOK_BITXORASSIGN: return "TOK_BITXORASSIGN";
2278 0 : case TOK_BITANDASSIGN: return "TOK_BITANDASSIGN";
2279 0 : case TOK_LSHASSIGN: return "TOK_LSHASSIGN";
2280 0 : case TOK_RSHASSIGN: return "TOK_RSHASSIGN";
2281 0 : case TOK_URSHASSIGN: return "TOK_URSHASSIGN";
2282 0 : case TOK_MULASSIGN: return "TOK_MULASSIGN";
2283 0 : case TOK_DIVASSIGN: return "TOK_DIVASSIGN";
2284 0 : case TOK_MODASSIGN: return "TOK_MODASSIGN";
2285 0 : case TOK_LIMIT: break;
2286 : }
2287 :
2288 0 : return "<bad TokenKind>";
2289 : }
2290 : #endif
|