XPathParser.cpp source code [webkit/Source/WebCore/xml/XPathParser.cpp]

1	/*
2	* Copyright 2005 Maksim Orlovich <maksim@kde.org>
3	* Copyright (C) 2006, 2013 Apple Inc. All rights reserved.
4	* Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org>
5	*
6	* Redistribution and use in source and binary forms, with or without
7	* modification, are permitted provided that the following conditions
8	* are met:
9	*
10	* 1. Redistributions of source code must retain the above copyright
11	* notice, this list of conditions and the following disclaimer.
12	* 2. Redistributions in binary form must reproduce the above copyright
13	* notice, this list of conditions and the following disclaimer in the
14	* documentation and/or other materials provided with the distribution.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26	*/
27
28	#include "config.h"
29	#include "XPathParser.h"
30
31	#include "XPathEvaluator.h"
32	#include "XPathNSResolver.h"
33	#include "XPathPath.h"
34	#include "XPathStep.h"
35	#include <wtf/NeverDestroyed.h>
36	#include <wtf/StdLibExtras.h>
37	#include <wtf/text/StringHash.h>
38
39	using namespace WebCore::XPath;
40
41	extern int xpathyyparse(WebCore::XPath::Parser&);
42
43	#include "XPathGrammar.h"
44
45	namespace WebCore {
46	namespace XPath {
47
48	struct Parser::Token {
49	int type;
50	String string;
51	Step::Axis axis;
52	NumericOp::Opcode numericOpcode;
53	EqTestOp::Opcode equalityTestOpcode;
54
55	Token(int type) : type(type) { }
56	Token(int type, const String& string) : type(type), string (string) { }
57	Token(int type, Step::Axis axis) : type(type), axis(axis) { }
58	Token(int type, NumericOp::Opcode opcode) : type(type), numericOpcode(opcode) { }
59	Token(int type, EqTestOp::Opcode opcode) : type(type), equalityTestOpcode(opcode) { }
60	};
61
62	enum XMLCat { NameStart, NameCont, NotPartOfName };
63
64	static XMLCat charCat(UChar character)
65	{
66	if (character == `'_'`)
67	return NameStart;
68
69	if (character == `'.'` \|\| character == `'-'`)
70	return NameCont;
71	unsigned characterTypeMask = U_GET_GC_MASK(character);
72	if (characterTypeMask & (U_GC_LU_MASK \| U_GC_LL_MASK \| U_GC_LO_MASK \| U_GC_LT_MASK \| U_GC_NL_MASK))
73	return NameStart;
74	if (characterTypeMask & (U_GC_M_MASK \| U_GC_LM_MASK \| U_GC_ND_MASK))
75	return NameCont;
76	return NotPartOfName;
77	}
78
79	static HashMap<String, Step::Axis> createAxisNamesMap()
80	{
81	struct AxisName {
82	const char* name;
83	Step::Axis axis;
84	};
85	const AxisName axisNameList[] = {
86	{ "ancestor", Step::AncestorAxis },
87	{ "ancestor-or-self", Step::AncestorOrSelfAxis },
88	{ "attribute", Step::AttributeAxis },
89	{ "child", Step::ChildAxis },
90	{ "descendant", Step::DescendantAxis },
91	{ "descendant-or-self", Step::DescendantOrSelfAxis },
92	{ "following", Step::FollowingAxis },
93	{ "following-sibling", Step::FollowingSiblingAxis },
94	{ "namespace", Step::NamespaceAxis },
95	{ "parent", Step::ParentAxis },
96	{ "preceding", Step::PrecedingAxis },
97	{ "preceding-sibling", Step::PrecedingSiblingAxis },
98	{ "self", Step::SelfAxis }
99	};
100	HashMap<String, Step::Axis> map;
101	for (auto& axisName : axisNameList)
102	map.add(axisName.name, axisName.axis);
103	return map;
104	}
105
106	static bool parseAxisName(const String& name, Step::Axis& type)
107	{
108	static const auto axisNames = makeNeverDestroyed(createAxisNamesMap());
109	auto it = axisNames.get().find(name);
110	if (it == axisNames.get().end())
111	return false;
112	type = it ->value;
113	return true;
114	}
115
116	// Returns whether the current token can possibly be a binary operator, given
117	// the previous token. Necessary to disambiguate some of the operators
118	// ( (multiply), div, and, or, mod) in the [32] Operator rule*
119	// (check http://www.w3.org/TR/xpath#exprlex).
120	bool Parser::isBinaryOperatorContext() const
121	{
122	switch (m_lastTokenType) {
123	case `0`:
124	case `'@'`: case AXISNAME: case `'('`: case `'['`: case `','`:
125	case AND: case OR: case MULOP:
126	case `'/'`: case SLASHSLASH: case `'\|'`: case PLUS: case MINUS:
127	case EQOP: case RELOP:
128	return false;
129	default:
130	return true;
131	}
132	}
133
134	void Parser::skipWS()
135	{
136	while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data [m_nextPos]))
137	++m_nextPos;
138	}
139
140	Parser::Token Parser::makeTokenAndAdvance(int code, int advance)
141	{
142	m_nextPos += advance;
143	return Token (code);
144	}
145
146	Parser::Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance)
147	{
148	m_nextPos += advance;
149	return Token (code, val);
150	}
151
152	Parser::Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance)
153	{
154	m_nextPos += advance;
155	return Token (code, val);
156	}
157
158	// Returns next char if it's there and interesting, 0 otherwise.
159	char Parser::peekAheadHelper()
160	{
161	if (m_nextPos + `1` >= m_data.length())
162	return `0`;
163	UChar next = m_data [m_nextPos + `1`];
164	if (next >= `0xff`)
165	return `0`;
166	return next;
167	}
168
169	char Parser::peekCurHelper()
170	{
171	if (m_nextPos >= m_data.length())
172	return `0`;
173	UChar next = m_data [m_nextPos];
174	if (next >= `0xff`)
175	return `0`;
176	return next;
177	}
178
179	Parser::Token Parser::lexString()
180	{
181	UChar delimiter = m_data [m_nextPos];
182	int startPos = m_nextPos + `1`;
183
184	for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
185	if (m_data [m_nextPos] == delimiter) {
186	String value = m_data.substring(startPos, m_nextPos - startPos);
187	if (value.isNull())
188	value = emptyString();
189	++m_nextPos; // Consume the char.
190	return Token (LITERAL, value);
191	}
192	}
193
194	// Ouch, went off the end -- report error.
195	return Token (XPATH_ERROR);
196	}
197
198	Parser::Token Parser::lexNumber()
199	{
200	int startPos = m_nextPos;
201	bool seenDot = false;
202
203	// Go until end or a non-digits character.
204	for (; m_nextPos < m_data.length(); ++m_nextPos) {
205	UChar aChar = m_data [m_nextPos];
206	if (aChar >= `0xff`) break;
207
208	if (!isASCIIDigit(aChar)) {
209	if (aChar == `'.'` && !seenDot)
210	seenDot = true;
211	else
212	break;
213	}
214	}
215
216	return Token (NUMBER, m_data.substring(startPos, m_nextPos - startPos));
217	}
218
219	bool Parser::lexNCName(String& name)
220	{
221	int startPos = m_nextPos;
222	if (m_nextPos >= m_data.length())
223	return false;
224
225	if (charCat(m_data [m_nextPos]) != NameStart)
226	return false;
227
228	// Keep going until we get a character that's not good for names.
229	for (; m_nextPos < m_data.length(); ++m_nextPos)
230	if (charCat(m_data [m_nextPos]) == NotPartOfName)
231	break;
232
233	name = m_data.substring(startPos, m_nextPos - startPos);
234	return true;
235	}
236
237	bool Parser::lexQName(String& name)
238	{
239	String n1;
240	if (!lexNCName(n1))
241	return false;
242
243	skipWS();
244
245	// If the next character is :, what we just got it the prefix, if not,
246	// it's the whole thing.
247	if (peekAheadHelper() != `':'`) {
248	name = n1;
249	return true;
250	}
251
252	String n2;
253	if (!lexNCName(n2))
254	return false;
255
256	name = n1 + ":" + n2;
257	return true;
258	}
259
260	inline Parser::Token Parser::nextTokenInternal()
261	{
262	skipWS();
263
264	if (m_nextPos >= m_data.length())
265	return Token (`0`);
266
267	char code = peekCurHelper();
268	switch (code) {
269	case `'('`: case `')'`: case `'['`: case `']'`:
270	case `'@'`: case `','`: case `'\|'`:
271	return makeTokenAndAdvance(code);
272	case `'\''`:
273	case `'\"'`:
274	return lexString();
275	case `'0'`: case `'1'`: case `'2'`: case `'3'`: case `'4'`:
276	case `'5'`: case `'6'`: case `'7'`: case `'8'`: case `'9'`:
277	return lexNumber();
278	case `'.'`: {
279	char next = peekAheadHelper();
280	if (next == `'.'`)
281	return makeTokenAndAdvance(DOTDOT, `2`);
282	if (isASCIIDigit(next))
283	return lexNumber();
284	return makeTokenAndAdvance(`'.'`);
285	}
286	case `'/'`:
287	if (peekAheadHelper() == `'/'`)
288	return makeTokenAndAdvance(SLASHSLASH, `2`);
289	return makeTokenAndAdvance(`'/'`);
290	case `'+'`:
291	return makeTokenAndAdvance(PLUS);
292	case `'-'`:
293	return makeTokenAndAdvance(MINUS);
294	case `'='`:
295	return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
296	case `'!'`:
297	if (peekAheadHelper() == `'='`)
298	return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, `2`);
299	return Token (XPATH_ERROR);
300	case `'<'`:
301	if (peekAheadHelper() == `'='`)
302	return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, `2`);
303	return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT);
304	case `'>'`:
305	if (peekAheadHelper() == `'='`)
306	return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, `2`);
307	return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT);
308	case `'*'`:
309	if (isBinaryOperatorContext())
310	return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
311	++m_nextPos;
312	return Token (NAMETEST, "*");
313	case `'$'`: { // $ QName
314	m_nextPos++;
315	String name;
316	if (!lexQName(name))
317	return Token (XPATH_ERROR);
318	return Token (VARIABLEREFERENCE, name);
319	}
320	}
321
322	String name;
323	if (!lexNCName(name))
324	return Token (XPATH_ERROR);
325
326	skipWS();
327	// If we're in an operator context, check for any operator names
328	if (isBinaryOperatorContext()) {
329	if (name == "and") //### hash?
330	return Token (AND);
331	if (name == "or")
332	return Token (OR);
333	if (name == "mod")
334	return Token (MULOP, NumericOp::OP_Mod);
335	if (name == "div")
336	return Token (MULOP, NumericOp::OP_Div);
337	}
338
339	// See whether we are at a :
340	if (peekCurHelper() == `':'`) {
341	m_nextPos++;
342	// Any chance it's an axis name?
343	if (peekCurHelper() == `':'`) {
344	m_nextPos++;
345
346	//It might be an axis name.
347	Step::Axis axis;
348	if (parseAxisName(name, axis))
349	return Token (AXISNAME, axis);
350	// Ugh, :: is only valid in axis names -> error
351	return Token (XPATH_ERROR);
352	}
353
354	// Seems like this is a fully qualified qname, or perhaps the modified one from NameTest*
355	skipWS();
356	if (peekCurHelper() == `'*'`) {
357	m_nextPos++;
358	return Token (NAMETEST, name + ":*");
359	}
360
361	// Make a full qname.
362	String n2;
363	if (!lexNCName(n2))
364	return Token (XPATH_ERROR);
365
366	name = name + ":" + n2;
367	}
368
369	skipWS();
370
371	if (peekCurHelper() == `'('`) {
372	// note: we don't swallow the '(' here!
373
374	// Either node type oor function name.
375
376	if (name == "processing-instruction")
377	return Token (PI);
378	if (name == "node")
379	return Token (NODE);
380	if (name == "text")
381	return Token (TEXT_);
382	if (name == "comment")
383	return Token (COMMENT);
384
385	return Token (FUNCTIONNAME, name);
386	}
387
388	// At this point, it must be NAMETEST.
389	return Token (NAMETEST, name);
390	}
391
392	inline Parser::Token Parser::nextToken()
393	{
394	Token token = nextTokenInternal();
395	m_lastTokenType = token.type;
396	return token;
397	}
398
399	Parser::Parser(const String& statement, RefPtr<XPathNSResolver>&& resolver)
400	: m_data(statement)
401	, m_resolver (WTFMove(resolver))
402	{
403	}
404
405	int Parser::lex(YYSTYPE& yylval)
406	{
407	Token token = nextToken();
408
409	switch (token.type) {
410	case AXISNAME:
411	yylval.axis = token.axis;
412	break;
413	case MULOP:
414	yylval.numericOpcode = token.numericOpcode;
415	break;
416	case RELOP:
417	case EQOP:
418	yylval.equalityTestOpcode = token.equalityTestOpcode;
419	break;
420	case NODETYPE:
421	case FUNCTIONNAME:
422	case LITERAL:
423	case VARIABLEREFERENCE:
424	case NUMBER:
425	case NAMETEST:
426	yylval.string = token.string.releaseImpl().leakRef();
427	break;
428	}
429
430	return token.type;
431	}
432
433	bool Parser::expandQualifiedName(const String& qualifiedName, String& localName, String& namespaceURI)
434	{
435	size_t colon = qualifiedName.find(`':'`);
436	if (colon != notFound) {
437	if (!m_resolver) {
438	m_sawNamespaceError = true;
439	return false;
440	}
441	namespaceURI = m_resolver ->lookupNamespaceURI(qualifiedName.left(colon));
442	if (namespaceURI.isNull()) {
443	m_sawNamespaceError = true;
444	return false;
445	}
446	localName = qualifiedName.substring(colon + `1`);
447	} else
448	localName = qualifiedName;
449	return true;
450	}
451
452	ExceptionOr<std::unique_ptr<Expression>> Parser::parseStatement(const String& statement, RefPtr<XPathNSResolver>&& resolver)
453	{
454	Parser parser { statement, WTFMove(resolver) };
455
456	int parseError = xpathyyparse(parser);
457
458	if (parser.m_sawNamespaceError)
459	return Exception { NamespaceError };
460
461	if (parseError)
462	return Exception { SyntaxError };
463
464	return WTFMove(parser.m_result);
465	}
466
467	} }
468

Browse the source code of webkit/Source/WebCore/xml/XPathParser.cpp