1/*
2 * Copyright 2005 Maksim Orlovich <maksim@kde.org>
3 * Copyright (C) 2006, 2013 Apple Inc. All rights reserved.
4 * Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "config.h"
29#include "XPathParser.h"
30
31#include "XPathEvaluator.h"
32#include "XPathNSResolver.h"
33#include "XPathPath.h"
34#include "XPathStep.h"
35#include <wtf/NeverDestroyed.h>
36#include <wtf/StdLibExtras.h>
37#include <wtf/text/StringHash.h>
38
39using namespace WebCore::XPath;
40
41extern int xpathyyparse(WebCore::XPath::Parser&);
42
43#include "XPathGrammar.h"
44
45namespace WebCore {
46namespace XPath {
47
48struct Parser::Token {
49 int type;
50 String string;
51 Step::Axis axis;
52 NumericOp::Opcode numericOpcode;
53 EqTestOp::Opcode equalityTestOpcode;
54
55 Token(int type) : type(type) { }
56 Token(int type, const String& string) : type(type), string(string) { }
57 Token(int type, Step::Axis axis) : type(type), axis(axis) { }
58 Token(int type, NumericOp::Opcode opcode) : type(type), numericOpcode(opcode) { }
59 Token(int type, EqTestOp::Opcode opcode) : type(type), equalityTestOpcode(opcode) { }
60};
61
62enum XMLCat { NameStart, NameCont, NotPartOfName };
63
64static XMLCat charCat(UChar character)
65{
66 if (character == '_')
67 return NameStart;
68
69 if (character == '.' || character == '-')
70 return NameCont;
71 unsigned characterTypeMask = U_GET_GC_MASK(character);
72 if (characterTypeMask & (U_GC_LU_MASK | U_GC_LL_MASK | U_GC_LO_MASK | U_GC_LT_MASK | U_GC_NL_MASK))
73 return NameStart;
74 if (characterTypeMask & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_ND_MASK))
75 return NameCont;
76 return NotPartOfName;
77}
78
79static HashMap<String, Step::Axis> createAxisNamesMap()
80{
81 struct AxisName {
82 const char* name;
83 Step::Axis axis;
84 };
85 const AxisName axisNameList[] = {
86 { "ancestor", Step::AncestorAxis },
87 { "ancestor-or-self", Step::AncestorOrSelfAxis },
88 { "attribute", Step::AttributeAxis },
89 { "child", Step::ChildAxis },
90 { "descendant", Step::DescendantAxis },
91 { "descendant-or-self", Step::DescendantOrSelfAxis },
92 { "following", Step::FollowingAxis },
93 { "following-sibling", Step::FollowingSiblingAxis },
94 { "namespace", Step::NamespaceAxis },
95 { "parent", Step::ParentAxis },
96 { "preceding", Step::PrecedingAxis },
97 { "preceding-sibling", Step::PrecedingSiblingAxis },
98 { "self", Step::SelfAxis }
99 };
100 HashMap<String, Step::Axis> map;
101 for (auto& axisName : axisNameList)
102 map.add(axisName.name, axisName.axis);
103 return map;
104}
105
106static bool parseAxisName(const String& name, Step::Axis& type)
107{
108 static const auto axisNames = makeNeverDestroyed(createAxisNamesMap());
109 auto it = axisNames.get().find(name);
110 if (it == axisNames.get().end())
111 return false;
112 type = it->value;
113 return true;
114}
115
116// Returns whether the current token can possibly be a binary operator, given
117// the previous token. Necessary to disambiguate some of the operators
118// (* (multiply), div, and, or, mod) in the [32] Operator rule
119// (check http://www.w3.org/TR/xpath#exprlex).
120bool Parser::isBinaryOperatorContext() const
121{
122 switch (m_lastTokenType) {
123 case 0:
124 case '@': case AXISNAME: case '(': case '[': case ',':
125 case AND: case OR: case MULOP:
126 case '/': case SLASHSLASH: case '|': case PLUS: case MINUS:
127 case EQOP: case RELOP:
128 return false;
129 default:
130 return true;
131 }
132}
133
134void Parser::skipWS()
135{
136 while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos]))
137 ++m_nextPos;
138}
139
140Parser::Token Parser::makeTokenAndAdvance(int code, int advance)
141{
142 m_nextPos += advance;
143 return Token(code);
144}
145
146Parser::Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance)
147{
148 m_nextPos += advance;
149 return Token(code, val);
150}
151
152Parser::Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance)
153{
154 m_nextPos += advance;
155 return Token(code, val);
156}
157
158// Returns next char if it's there and interesting, 0 otherwise.
159char Parser::peekAheadHelper()
160{
161 if (m_nextPos + 1 >= m_data.length())
162 return 0;
163 UChar next = m_data[m_nextPos + 1];
164 if (next >= 0xff)
165 return 0;
166 return next;
167}
168
169char Parser::peekCurHelper()
170{
171 if (m_nextPos >= m_data.length())
172 return 0;
173 UChar next = m_data[m_nextPos];
174 if (next >= 0xff)
175 return 0;
176 return next;
177}
178
179Parser::Token Parser::lexString()
180{
181 UChar delimiter = m_data[m_nextPos];
182 int startPos = m_nextPos + 1;
183
184 for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
185 if (m_data[m_nextPos] == delimiter) {
186 String value = m_data.substring(startPos, m_nextPos - startPos);
187 if (value.isNull())
188 value = emptyString();
189 ++m_nextPos; // Consume the char.
190 return Token(LITERAL, value);
191 }
192 }
193
194 // Ouch, went off the end -- report error.
195 return Token(XPATH_ERROR);
196}
197
198Parser::Token Parser::lexNumber()
199{
200 int startPos = m_nextPos;
201 bool seenDot = false;
202
203 // Go until end or a non-digits character.
204 for (; m_nextPos < m_data.length(); ++m_nextPos) {
205 UChar aChar = m_data[m_nextPos];
206 if (aChar >= 0xff) break;
207
208 if (!isASCIIDigit(aChar)) {
209 if (aChar == '.' && !seenDot)
210 seenDot = true;
211 else
212 break;
213 }
214 }
215
216 return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos));
217}
218
219bool Parser::lexNCName(String& name)
220{
221 int startPos = m_nextPos;
222 if (m_nextPos >= m_data.length())
223 return false;
224
225 if (charCat(m_data[m_nextPos]) != NameStart)
226 return false;
227
228 // Keep going until we get a character that's not good for names.
229 for (; m_nextPos < m_data.length(); ++m_nextPos)
230 if (charCat(m_data[m_nextPos]) == NotPartOfName)
231 break;
232
233 name = m_data.substring(startPos, m_nextPos - startPos);
234 return true;
235}
236
237bool Parser::lexQName(String& name)
238{
239 String n1;
240 if (!lexNCName(n1))
241 return false;
242
243 skipWS();
244
245 // If the next character is :, what we just got it the prefix, if not,
246 // it's the whole thing.
247 if (peekAheadHelper() != ':') {
248 name = n1;
249 return true;
250 }
251
252 String n2;
253 if (!lexNCName(n2))
254 return false;
255
256 name = n1 + ":" + n2;
257 return true;
258}
259
260inline Parser::Token Parser::nextTokenInternal()
261{
262 skipWS();
263
264 if (m_nextPos >= m_data.length())
265 return Token(0);
266
267 char code = peekCurHelper();
268 switch (code) {
269 case '(': case ')': case '[': case ']':
270 case '@': case ',': case '|':
271 return makeTokenAndAdvance(code);
272 case '\'':
273 case '\"':
274 return lexString();
275 case '0': case '1': case '2': case '3': case '4':
276 case '5': case '6': case '7': case '8': case '9':
277 return lexNumber();
278 case '.': {
279 char next = peekAheadHelper();
280 if (next == '.')
281 return makeTokenAndAdvance(DOTDOT, 2);
282 if (isASCIIDigit(next))
283 return lexNumber();
284 return makeTokenAndAdvance('.');
285 }
286 case '/':
287 if (peekAheadHelper() == '/')
288 return makeTokenAndAdvance(SLASHSLASH, 2);
289 return makeTokenAndAdvance('/');
290 case '+':
291 return makeTokenAndAdvance(PLUS);
292 case '-':
293 return makeTokenAndAdvance(MINUS);
294 case '=':
295 return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
296 case '!':
297 if (peekAheadHelper() == '=')
298 return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
299 return Token(XPATH_ERROR);
300 case '<':
301 if (peekAheadHelper() == '=')
302 return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2);
303 return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT);
304 case '>':
305 if (peekAheadHelper() == '=')
306 return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2);
307 return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT);
308 case '*':
309 if (isBinaryOperatorContext())
310 return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
311 ++m_nextPos;
312 return Token(NAMETEST, "*");
313 case '$': { // $ QName
314 m_nextPos++;
315 String name;
316 if (!lexQName(name))
317 return Token(XPATH_ERROR);
318 return Token(VARIABLEREFERENCE, name);
319 }
320 }
321
322 String name;
323 if (!lexNCName(name))
324 return Token(XPATH_ERROR);
325
326 skipWS();
327 // If we're in an operator context, check for any operator names
328 if (isBinaryOperatorContext()) {
329 if (name == "and") //### hash?
330 return Token(AND);
331 if (name == "or")
332 return Token(OR);
333 if (name == "mod")
334 return Token(MULOP, NumericOp::OP_Mod);
335 if (name == "div")
336 return Token(MULOP, NumericOp::OP_Div);
337 }
338
339 // See whether we are at a :
340 if (peekCurHelper() == ':') {
341 m_nextPos++;
342 // Any chance it's an axis name?
343 if (peekCurHelper() == ':') {
344 m_nextPos++;
345
346 //It might be an axis name.
347 Step::Axis axis;
348 if (parseAxisName(name, axis))
349 return Token(AXISNAME, axis);
350 // Ugh, :: is only valid in axis names -> error
351 return Token(XPATH_ERROR);
352 }
353
354 // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest
355 skipWS();
356 if (peekCurHelper() == '*') {
357 m_nextPos++;
358 return Token(NAMETEST, name + ":*");
359 }
360
361 // Make a full qname.
362 String n2;
363 if (!lexNCName(n2))
364 return Token(XPATH_ERROR);
365
366 name = name + ":" + n2;
367 }
368
369 skipWS();
370
371 if (peekCurHelper() == '(') {
372 // note: we don't swallow the '(' here!
373
374 // Either node type oor function name.
375
376 if (name == "processing-instruction")
377 return Token(PI);
378 if (name == "node")
379 return Token(NODE);
380 if (name == "text")
381 return Token(TEXT_);
382 if (name == "comment")
383 return Token(COMMENT);
384
385 return Token(FUNCTIONNAME, name);
386 }
387
388 // At this point, it must be NAMETEST.
389 return Token(NAMETEST, name);
390}
391
392inline Parser::Token Parser::nextToken()
393{
394 Token token = nextTokenInternal();
395 m_lastTokenType = token.type;
396 return token;
397}
398
399Parser::Parser(const String& statement, RefPtr<XPathNSResolver>&& resolver)
400 : m_data(statement)
401 , m_resolver(WTFMove(resolver))
402{
403}
404
405int Parser::lex(YYSTYPE& yylval)
406{
407 Token token = nextToken();
408
409 switch (token.type) {
410 case AXISNAME:
411 yylval.axis = token.axis;
412 break;
413 case MULOP:
414 yylval.numericOpcode = token.numericOpcode;
415 break;
416 case RELOP:
417 case EQOP:
418 yylval.equalityTestOpcode = token.equalityTestOpcode;
419 break;
420 case NODETYPE:
421 case FUNCTIONNAME:
422 case LITERAL:
423 case VARIABLEREFERENCE:
424 case NUMBER:
425 case NAMETEST:
426 yylval.string = token.string.releaseImpl().leakRef();
427 break;
428 }
429
430 return token.type;
431}
432
433bool Parser::expandQualifiedName(const String& qualifiedName, String& localName, String& namespaceURI)
434{
435 size_t colon = qualifiedName.find(':');
436 if (colon != notFound) {
437 if (!m_resolver) {
438 m_sawNamespaceError = true;
439 return false;
440 }
441 namespaceURI = m_resolver->lookupNamespaceURI(qualifiedName.left(colon));
442 if (namespaceURI.isNull()) {
443 m_sawNamespaceError = true;
444 return false;
445 }
446 localName = qualifiedName.substring(colon + 1);
447 } else
448 localName = qualifiedName;
449 return true;
450}
451
452ExceptionOr<std::unique_ptr<Expression>> Parser::parseStatement(const String& statement, RefPtr<XPathNSResolver>&& resolver)
453{
454 Parser parser { statement, WTFMove(resolver) };
455
456 int parseError = xpathyyparse(parser);
457
458 if (parser.m_sawNamespaceError)
459 return Exception { NamespaceError };
460
461 if (parseError)
462 return Exception { SyntaxError };
463
464 return WTFMove(parser.m_result);
465}
466
467} }
468