1 | /* |
2 | * This file is part of the XSL implementation. |
3 | * |
4 | * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple, Inc. All rights reserved. |
5 | * Copyright (C) 2005, 2006 Alexey Proskuryakov <ap@webkit.org> |
6 | * |
7 | * This library is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Library General Public |
9 | * License as published by the Free Software Foundation; either |
10 | * version 2 of the License, or (at your option) any later version. |
11 | * |
12 | * This library is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Library General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU Library General Public License |
18 | * along with this library; see the file COPYING.LIB. If not, write to |
19 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
20 | * Boston, MA 02110-1301, USA. |
21 | */ |
22 | |
23 | #include "config.h" |
24 | |
25 | #if ENABLE(XSLT) |
26 | |
27 | #include "XSLTProcessor.h" |
28 | |
29 | #include "CachedResourceLoader.h" |
30 | #include "Document.h" |
31 | #include "Frame.h" |
32 | #include "FrameLoader.h" |
33 | #include "Page.h" |
34 | #include "PageConsoleClient.h" |
35 | #include "ResourceError.h" |
36 | #include "ResourceRequest.h" |
37 | #include "ResourceResponse.h" |
38 | #include "SecurityOrigin.h" |
39 | #include "SharedBuffer.h" |
40 | #include "TransformSource.h" |
41 | #include "XMLDocumentParser.h" |
42 | #include "XSLTExtensions.h" |
43 | #include "XSLTUnicodeSort.h" |
44 | #include "markup.h" |
45 | #include <libxslt/imports.h> |
46 | #include <libxslt/security.h> |
47 | #include <libxslt/variables.h> |
48 | #include <libxslt/xslt.h> |
49 | #include <libxslt/xsltutils.h> |
50 | #include <wtf/Assertions.h> |
51 | |
52 | #if OS(DARWIN) && !PLATFORM(GTK) |
53 | #include "SoftLinkLibxslt.h" |
54 | #endif |
55 | |
56 | namespace WebCore { |
57 | |
58 | void XSLTProcessor::genericErrorFunc(void*, const char*, ...) |
59 | { |
60 | // It would be nice to do something with this error message. |
61 | } |
62 | |
63 | void XSLTProcessor::parseErrorFunc(void* userData, xmlError* error) |
64 | { |
65 | PageConsoleClient* console = static_cast<PageConsoleClient*>(userData); |
66 | if (!console) |
67 | return; |
68 | |
69 | MessageLevel level; |
70 | switch (error->level) { |
71 | case XML_ERR_NONE: |
72 | level = MessageLevel::Debug; |
73 | break; |
74 | case XML_ERR_WARNING: |
75 | level = MessageLevel::Warning; |
76 | break; |
77 | case XML_ERR_ERROR: |
78 | case XML_ERR_FATAL: |
79 | default: |
80 | level = MessageLevel::Error; |
81 | break; |
82 | } |
83 | |
84 | // xmlError->int2 is the column number of the error or 0 if N/A. |
85 | console->addMessage(MessageSource::XML, level, error->message, error->file, error->line, error->int2); |
86 | } |
87 | |
88 | // FIXME: There seems to be no way to control the ctxt pointer for loading here, thus we have globals. |
89 | static XSLTProcessor* globalProcessor = nullptr; |
90 | static CachedResourceLoader* globalCachedResourceLoader = nullptr; |
91 | static xmlDocPtr docLoaderFunc(const xmlChar* uri, |
92 | xmlDictPtr, |
93 | int options, |
94 | void* ctxt, |
95 | xsltLoadType type) |
96 | { |
97 | if (!globalProcessor) |
98 | return 0; |
99 | |
100 | switch (type) { |
101 | case XSLT_LOAD_DOCUMENT: { |
102 | xsltTransformContextPtr context = (xsltTransformContextPtr)ctxt; |
103 | xmlChar* base = xmlNodeGetBase(context->document->doc, context->node); |
104 | URL url(URL({ }, reinterpret_cast<const char*>(base)), reinterpret_cast<const char*>(uri)); |
105 | xmlFree(base); |
106 | ResourceError error; |
107 | ResourceResponse response; |
108 | |
109 | RefPtr<SharedBuffer> data; |
110 | |
111 | bool requestAllowed = globalCachedResourceLoader->frame() && globalCachedResourceLoader->document()->securityOrigin().canRequest(url); |
112 | if (requestAllowed) { |
113 | FetchOptions options; |
114 | options.mode = FetchOptions::Mode::SameOrigin; |
115 | options.credentials = FetchOptions::Credentials::Include; |
116 | globalCachedResourceLoader->frame()->loader().loadResourceSynchronously(url, ClientCredentialPolicy::MayAskClientForCredentials, options, { }, error, response, data); |
117 | if (error.isNull()) |
118 | requestAllowed = globalCachedResourceLoader->document()->securityOrigin().canRequest(response.url()); |
119 | else if (data) |
120 | data = nullptr; |
121 | } |
122 | if (!requestAllowed) { |
123 | if (data) |
124 | data = nullptr; |
125 | globalCachedResourceLoader->printAccessDeniedMessage(url); |
126 | } |
127 | |
128 | PageConsoleClient* console = nullptr; |
129 | Frame* frame = globalProcessor->xslStylesheet()->ownerDocument()->frame(); |
130 | if (frame && frame->page()) |
131 | console = &frame->page()->console(); |
132 | xmlSetStructuredErrorFunc(console, XSLTProcessor::parseErrorFunc); |
133 | xmlSetGenericErrorFunc(console, XSLTProcessor::genericErrorFunc); |
134 | |
135 | // We don't specify an encoding here. Neither Gecko nor WinIE respects |
136 | // the encoding specified in the HTTP headers. |
137 | xmlDocPtr doc = xmlReadMemory(data ? data->data() : nullptr, data ? data->size() : 0, (const char*)uri, 0, options); |
138 | |
139 | xmlSetStructuredErrorFunc(0, 0); |
140 | xmlSetGenericErrorFunc(0, 0); |
141 | |
142 | return doc; |
143 | } |
144 | case XSLT_LOAD_STYLESHEET: |
145 | return globalProcessor->xslStylesheet()->locateStylesheetSubResource(((xsltStylesheetPtr)ctxt)->doc, uri); |
146 | default: |
147 | break; |
148 | } |
149 | |
150 | return 0; |
151 | } |
152 | |
153 | static inline void setXSLTLoadCallBack(xsltDocLoaderFunc func, XSLTProcessor* processor, CachedResourceLoader* cachedResourceLoader) |
154 | { |
155 | xsltSetLoaderFunc(func); |
156 | globalProcessor = processor; |
157 | globalCachedResourceLoader = cachedResourceLoader; |
158 | } |
159 | |
160 | static int writeToStringBuilder(void* context, const char* buffer, int length) |
161 | { |
162 | StringBuilder& resultOutput = *static_cast<StringBuilder*>(context); |
163 | |
164 | // FIXME: Consider ways to make this more efficient by moving it into a |
165 | // StringBuilder::appendUTF8 function, and then optimizing to not need a |
166 | // Vector<UChar> and possibly optimize cases that can produce 8-bit Latin-1 |
167 | // strings, but that would need to be sophisticated about not processing |
168 | // trailing incomplete sequences and communicating that to the caller. |
169 | |
170 | Vector<UChar> outputBuffer(length); |
171 | |
172 | UBool error = false; |
173 | int inputOffset = 0; |
174 | int outputOffset = 0; |
175 | while (inputOffset < length) { |
176 | UChar32 character; |
177 | int nextInputOffset = inputOffset; |
178 | U8_NEXT(reinterpret_cast<const uint8_t*>(buffer), nextInputOffset, length, character); |
179 | if (character < 0) { |
180 | if (nextInputOffset == length) |
181 | break; |
182 | ASSERT_NOT_REACHED(); |
183 | return -1; |
184 | } |
185 | inputOffset = nextInputOffset; |
186 | U16_APPEND(outputBuffer.data(), outputOffset, length, character, error); |
187 | if (error) { |
188 | ASSERT_NOT_REACHED(); |
189 | return -1; |
190 | } |
191 | } |
192 | |
193 | resultOutput.append(outputBuffer.data(), outputOffset); |
194 | return inputOffset; |
195 | } |
196 | |
197 | static bool saveResultToString(xmlDocPtr resultDoc, xsltStylesheetPtr sheet, String& resultString) |
198 | { |
199 | xmlOutputBufferPtr outputBuf = xmlAllocOutputBuffer(nullptr); |
200 | if (!outputBuf) |
201 | return false; |
202 | |
203 | StringBuilder resultBuilder; |
204 | outputBuf->context = &resultBuilder; |
205 | outputBuf->writecallback = writeToStringBuilder; |
206 | |
207 | int retval = xsltSaveResultTo(outputBuf, resultDoc, sheet); |
208 | xmlOutputBufferClose(outputBuf); |
209 | if (retval < 0) |
210 | return false; |
211 | |
212 | // Workaround for <http://bugzilla.gnome.org/show_bug.cgi?id=495668>: libxslt appends an extra line feed to the result. |
213 | if (resultBuilder.length() > 0 && resultBuilder[resultBuilder.length() - 1] == '\n') |
214 | resultBuilder.resize(resultBuilder.length() - 1); |
215 | |
216 | resultString = resultBuilder.toString(); |
217 | |
218 | return true; |
219 | } |
220 | |
221 | static const char** xsltParamArrayFromParameterMap(XSLTProcessor::ParameterMap& parameters) |
222 | { |
223 | if (parameters.isEmpty()) |
224 | return 0; |
225 | |
226 | const char** parameterArray = (const char**)fastMalloc(((parameters.size() * 2) + 1) * sizeof(char*)); |
227 | |
228 | unsigned index = 0; |
229 | for (auto& parameter : parameters) { |
230 | parameterArray[index++] = fastStrDup(parameter.key.utf8().data()); |
231 | parameterArray[index++] = fastStrDup(parameter.value.utf8().data()); |
232 | } |
233 | parameterArray[index] = nullptr; |
234 | |
235 | return parameterArray; |
236 | } |
237 | |
238 | static void freeXsltParamArray(const char** params) |
239 | { |
240 | const char** temp = params; |
241 | if (!params) |
242 | return; |
243 | |
244 | while (*temp) { |
245 | fastFree((void*)*(temp++)); |
246 | fastFree((void*)*(temp++)); |
247 | } |
248 | fastFree(params); |
249 | } |
250 | |
251 | static xsltStylesheetPtr xsltStylesheetPointer(RefPtr<XSLStyleSheet>& cachedStylesheet, Node* stylesheetRootNode) |
252 | { |
253 | if (!cachedStylesheet && stylesheetRootNode) { |
254 | cachedStylesheet = XSLStyleSheet::createForXSLTProcessor(stylesheetRootNode->parentNode() ? stylesheetRootNode->parentNode() : stylesheetRootNode, |
255 | stylesheetRootNode->document().url().string(), |
256 | stylesheetRootNode->document().url()); // FIXME: Should we use baseURL here? |
257 | |
258 | // According to Mozilla documentation, the node must be a Document node, an xsl:stylesheet or xsl:transform element. |
259 | // But we just use text content regardless of node type. |
260 | cachedStylesheet->parseString(serializeFragment(*stylesheetRootNode, SerializedNodes::SubtreeIncludingNode)); |
261 | } |
262 | |
263 | if (!cachedStylesheet || !cachedStylesheet->document()) |
264 | return 0; |
265 | |
266 | return cachedStylesheet->compileStyleSheet(); |
267 | } |
268 | |
269 | static inline xmlDocPtr xmlDocPtrFromNode(Node& sourceNode, bool& shouldDelete) |
270 | { |
271 | Ref<Document> ownerDocument(sourceNode.document()); |
272 | bool sourceIsDocument = (&sourceNode == &ownerDocument.get()); |
273 | |
274 | xmlDocPtr sourceDoc = nullptr; |
275 | if (sourceIsDocument && ownerDocument->transformSource()) |
276 | sourceDoc = ownerDocument->transformSource()->platformSource(); |
277 | if (!sourceDoc) { |
278 | sourceDoc = xmlDocPtrForString(ownerDocument->cachedResourceLoader(), serializeFragment(sourceNode, SerializedNodes::SubtreeIncludingNode), |
279 | sourceIsDocument ? ownerDocument->url().string() : String()); |
280 | shouldDelete = sourceDoc; |
281 | } |
282 | return sourceDoc; |
283 | } |
284 | |
285 | static inline String resultMIMEType(xmlDocPtr resultDoc, xsltStylesheetPtr sheet) |
286 | { |
287 | // There are three types of output we need to be able to deal with: |
288 | // HTML (create an HTML document), XML (create an XML document), |
289 | // and text (wrap in a <pre> and create an XML document). |
290 | |
291 | const xmlChar* resultType = nullptr; |
292 | XSLT_GET_IMPORT_PTR(resultType, sheet, method); |
293 | if (!resultType && resultDoc->type == XML_HTML_DOCUMENT_NODE) |
294 | resultType = (const xmlChar*)"html" ; |
295 | |
296 | if (xmlStrEqual(resultType, (const xmlChar*)"html" )) |
297 | return "text/html" ; |
298 | if (xmlStrEqual(resultType, (const xmlChar*)"text" )) |
299 | return "text/plain" ; |
300 | |
301 | return "application/xml" ; |
302 | } |
303 | |
304 | bool XSLTProcessor::transformToString(Node& sourceNode, String& mimeType, String& resultString, String& resultEncoding) |
305 | { |
306 | Ref<Document> ownerDocument(sourceNode.document()); |
307 | |
308 | setXSLTLoadCallBack(docLoaderFunc, this, &ownerDocument->cachedResourceLoader()); |
309 | xsltStylesheetPtr sheet = xsltStylesheetPointer(m_stylesheet, m_stylesheetRootNode.get()); |
310 | if (!sheet) { |
311 | setXSLTLoadCallBack(nullptr, nullptr, nullptr); |
312 | m_stylesheet = nullptr; |
313 | return false; |
314 | } |
315 | m_stylesheet->clearDocuments(); |
316 | |
317 | #if OS(DARWIN) && !PLATFORM(GTK) |
318 | int origXsltMaxDepth = *xsltMaxDepth; |
319 | *xsltMaxDepth = 1000; |
320 | #else |
321 | int origXsltMaxDepth = xsltMaxDepth; |
322 | xsltMaxDepth = 1000; |
323 | #endif |
324 | |
325 | xmlChar* origMethod = sheet->method; |
326 | if (!origMethod && mimeType == "text/html" ) |
327 | sheet->method = reinterpret_cast<xmlChar*>(const_cast<char*>("html" )); |
328 | |
329 | bool success = false; |
330 | bool shouldFreeSourceDoc = false; |
331 | if (xmlDocPtr sourceDoc = xmlDocPtrFromNode(sourceNode, shouldFreeSourceDoc)) { |
332 | // The XML declaration would prevent parsing the result as a fragment, and it's not needed even for documents, |
333 | // as the result of this function is always immediately parsed. |
334 | sheet->omitXmlDeclaration = true; |
335 | |
336 | xsltTransformContextPtr transformContext = xsltNewTransformContext(sheet, sourceDoc); |
337 | registerXSLTExtensions(transformContext); |
338 | |
339 | xsltSecurityPrefsPtr securityPrefs = xsltNewSecurityPrefs(); |
340 | // Read permissions are checked by docLoaderFunc. |
341 | if (0 != xsltSetSecurityPrefs(securityPrefs, XSLT_SECPREF_WRITE_FILE, xsltSecurityForbid)) |
342 | CRASH(); |
343 | if (0 != xsltSetSecurityPrefs(securityPrefs, XSLT_SECPREF_CREATE_DIRECTORY, xsltSecurityForbid)) |
344 | CRASH(); |
345 | if (0 != xsltSetSecurityPrefs(securityPrefs, XSLT_SECPREF_WRITE_NETWORK, xsltSecurityForbid)) |
346 | CRASH(); |
347 | if (0 != xsltSetCtxtSecurityPrefs(securityPrefs, transformContext)) |
348 | CRASH(); |
349 | |
350 | // <http://bugs.webkit.org/show_bug.cgi?id=16077>: XSLT processor <xsl:sort> algorithm only compares by code point. |
351 | xsltSetCtxtSortFunc(transformContext, xsltUnicodeSortFunction); |
352 | |
353 | // This is a workaround for a bug in libxslt. |
354 | // The bug has been fixed in version 1.1.13, so once we ship that this can be removed. |
355 | if (!transformContext->globalVars) |
356 | transformContext->globalVars = xmlHashCreate(20); |
357 | |
358 | const char** params = xsltParamArrayFromParameterMap(m_parameters); |
359 | xsltQuoteUserParams(transformContext, params); |
360 | xmlDocPtr resultDoc = xsltApplyStylesheetUser(sheet, sourceDoc, 0, 0, 0, transformContext); |
361 | |
362 | xsltFreeTransformContext(transformContext); |
363 | xsltFreeSecurityPrefs(securityPrefs); |
364 | freeXsltParamArray(params); |
365 | |
366 | if (shouldFreeSourceDoc) |
367 | xmlFreeDoc(sourceDoc); |
368 | |
369 | if ((success = saveResultToString(resultDoc, sheet, resultString))) { |
370 | mimeType = resultMIMEType(resultDoc, sheet); |
371 | resultEncoding = reinterpret_cast<const char*>(resultDoc->encoding); |
372 | } |
373 | xmlFreeDoc(resultDoc); |
374 | } |
375 | |
376 | sheet->method = origMethod; |
377 | #if OS(DARWIN) && !PLATFORM(GTK) |
378 | *xsltMaxDepth = origXsltMaxDepth; |
379 | #else |
380 | xsltMaxDepth = origXsltMaxDepth; |
381 | #endif |
382 | setXSLTLoadCallBack(0, 0, 0); |
383 | xsltFreeStylesheet(sheet); |
384 | m_stylesheet = nullptr; |
385 | |
386 | return success; |
387 | } |
388 | |
389 | } // namespace WebCore |
390 | |
391 | #endif // ENABLE(XSLT) |
392 | |