| 1 | /* |
| 2 | * This file is part of the XSL implementation. |
| 3 | * |
| 4 | * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple, Inc. All rights reserved. |
| 5 | * Copyright (C) 2005, 2006 Alexey Proskuryakov <ap@webkit.org> |
| 6 | * |
| 7 | * This library is free software; you can redistribute it and/or |
| 8 | * modify it under the terms of the GNU Library General Public |
| 9 | * License as published by the Free Software Foundation; either |
| 10 | * version 2 of the License, or (at your option) any later version. |
| 11 | * |
| 12 | * This library is distributed in the hope that it will be useful, |
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | * Library General Public License for more details. |
| 16 | * |
| 17 | * You should have received a copy of the GNU Library General Public License |
| 18 | * along with this library; see the file COPYING.LIB. If not, write to |
| 19 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| 20 | * Boston, MA 02110-1301, USA. |
| 21 | */ |
| 22 | |
| 23 | #include "config.h" |
| 24 | |
| 25 | #if ENABLE(XSLT) |
| 26 | |
| 27 | #include "XSLTProcessor.h" |
| 28 | |
| 29 | #include "CachedResourceLoader.h" |
| 30 | #include "Document.h" |
| 31 | #include "Frame.h" |
| 32 | #include "FrameLoader.h" |
| 33 | #include "Page.h" |
| 34 | #include "PageConsoleClient.h" |
| 35 | #include "ResourceError.h" |
| 36 | #include "ResourceRequest.h" |
| 37 | #include "ResourceResponse.h" |
| 38 | #include "SecurityOrigin.h" |
| 39 | #include "SharedBuffer.h" |
| 40 | #include "TransformSource.h" |
| 41 | #include "XMLDocumentParser.h" |
| 42 | #include "XSLTExtensions.h" |
| 43 | #include "XSLTUnicodeSort.h" |
| 44 | #include "markup.h" |
| 45 | #include <libxslt/imports.h> |
| 46 | #include <libxslt/security.h> |
| 47 | #include <libxslt/variables.h> |
| 48 | #include <libxslt/xslt.h> |
| 49 | #include <libxslt/xsltutils.h> |
| 50 | #include <wtf/Assertions.h> |
| 51 | |
| 52 | #if OS(DARWIN) && !PLATFORM(GTK) |
| 53 | #include "SoftLinkLibxslt.h" |
| 54 | #endif |
| 55 | |
| 56 | namespace WebCore { |
| 57 | |
| 58 | void XSLTProcessor::genericErrorFunc(void*, const char*, ...) |
| 59 | { |
| 60 | // It would be nice to do something with this error message. |
| 61 | } |
| 62 | |
| 63 | void XSLTProcessor::parseErrorFunc(void* userData, xmlError* error) |
| 64 | { |
| 65 | PageConsoleClient* console = static_cast<PageConsoleClient*>(userData); |
| 66 | if (!console) |
| 67 | return; |
| 68 | |
| 69 | MessageLevel level; |
| 70 | switch (error->level) { |
| 71 | case XML_ERR_NONE: |
| 72 | level = MessageLevel::Debug; |
| 73 | break; |
| 74 | case XML_ERR_WARNING: |
| 75 | level = MessageLevel::Warning; |
| 76 | break; |
| 77 | case XML_ERR_ERROR: |
| 78 | case XML_ERR_FATAL: |
| 79 | default: |
| 80 | level = MessageLevel::Error; |
| 81 | break; |
| 82 | } |
| 83 | |
| 84 | // xmlError->int2 is the column number of the error or 0 if N/A. |
| 85 | console->addMessage(MessageSource::XML, level, error->message, error->file, error->line, error->int2); |
| 86 | } |
| 87 | |
| 88 | // FIXME: There seems to be no way to control the ctxt pointer for loading here, thus we have globals. |
| 89 | static XSLTProcessor* globalProcessor = nullptr; |
| 90 | static CachedResourceLoader* globalCachedResourceLoader = nullptr; |
| 91 | static xmlDocPtr docLoaderFunc(const xmlChar* uri, |
| 92 | xmlDictPtr, |
| 93 | int options, |
| 94 | void* ctxt, |
| 95 | xsltLoadType type) |
| 96 | { |
| 97 | if (!globalProcessor) |
| 98 | return 0; |
| 99 | |
| 100 | switch (type) { |
| 101 | case XSLT_LOAD_DOCUMENT: { |
| 102 | xsltTransformContextPtr context = (xsltTransformContextPtr)ctxt; |
| 103 | xmlChar* base = xmlNodeGetBase(context->document->doc, context->node); |
| 104 | URL url(URL({ }, reinterpret_cast<const char*>(base)), reinterpret_cast<const char*>(uri)); |
| 105 | xmlFree(base); |
| 106 | ResourceError error; |
| 107 | ResourceResponse response; |
| 108 | |
| 109 | RefPtr<SharedBuffer> data; |
| 110 | |
| 111 | bool requestAllowed = globalCachedResourceLoader->frame() && globalCachedResourceLoader->document()->securityOrigin().canRequest(url); |
| 112 | if (requestAllowed) { |
| 113 | FetchOptions options; |
| 114 | options.mode = FetchOptions::Mode::SameOrigin; |
| 115 | options.credentials = FetchOptions::Credentials::Include; |
| 116 | globalCachedResourceLoader->frame()->loader().loadResourceSynchronously(url, ClientCredentialPolicy::MayAskClientForCredentials, options, { }, error, response, data); |
| 117 | if (error.isNull()) |
| 118 | requestAllowed = globalCachedResourceLoader->document()->securityOrigin().canRequest(response.url()); |
| 119 | else if (data) |
| 120 | data = nullptr; |
| 121 | } |
| 122 | if (!requestAllowed) { |
| 123 | if (data) |
| 124 | data = nullptr; |
| 125 | globalCachedResourceLoader->printAccessDeniedMessage(url); |
| 126 | } |
| 127 | |
| 128 | PageConsoleClient* console = nullptr; |
| 129 | Frame* frame = globalProcessor->xslStylesheet()->ownerDocument()->frame(); |
| 130 | if (frame && frame->page()) |
| 131 | console = &frame->page()->console(); |
| 132 | xmlSetStructuredErrorFunc(console, XSLTProcessor::parseErrorFunc); |
| 133 | xmlSetGenericErrorFunc(console, XSLTProcessor::genericErrorFunc); |
| 134 | |
| 135 | // We don't specify an encoding here. Neither Gecko nor WinIE respects |
| 136 | // the encoding specified in the HTTP headers. |
| 137 | xmlDocPtr doc = xmlReadMemory(data ? data->data() : nullptr, data ? data->size() : 0, (const char*)uri, 0, options); |
| 138 | |
| 139 | xmlSetStructuredErrorFunc(0, 0); |
| 140 | xmlSetGenericErrorFunc(0, 0); |
| 141 | |
| 142 | return doc; |
| 143 | } |
| 144 | case XSLT_LOAD_STYLESHEET: |
| 145 | return globalProcessor->xslStylesheet()->locateStylesheetSubResource(((xsltStylesheetPtr)ctxt)->doc, uri); |
| 146 | default: |
| 147 | break; |
| 148 | } |
| 149 | |
| 150 | return 0; |
| 151 | } |
| 152 | |
| 153 | static inline void setXSLTLoadCallBack(xsltDocLoaderFunc func, XSLTProcessor* processor, CachedResourceLoader* cachedResourceLoader) |
| 154 | { |
| 155 | xsltSetLoaderFunc(func); |
| 156 | globalProcessor = processor; |
| 157 | globalCachedResourceLoader = cachedResourceLoader; |
| 158 | } |
| 159 | |
| 160 | static int writeToStringBuilder(void* context, const char* buffer, int length) |
| 161 | { |
| 162 | StringBuilder& resultOutput = *static_cast<StringBuilder*>(context); |
| 163 | |
| 164 | // FIXME: Consider ways to make this more efficient by moving it into a |
| 165 | // StringBuilder::appendUTF8 function, and then optimizing to not need a |
| 166 | // Vector<UChar> and possibly optimize cases that can produce 8-bit Latin-1 |
| 167 | // strings, but that would need to be sophisticated about not processing |
| 168 | // trailing incomplete sequences and communicating that to the caller. |
| 169 | |
| 170 | Vector<UChar> outputBuffer(length); |
| 171 | |
| 172 | UBool error = false; |
| 173 | int inputOffset = 0; |
| 174 | int outputOffset = 0; |
| 175 | while (inputOffset < length) { |
| 176 | UChar32 character; |
| 177 | int nextInputOffset = inputOffset; |
| 178 | U8_NEXT(reinterpret_cast<const uint8_t*>(buffer), nextInputOffset, length, character); |
| 179 | if (character < 0) { |
| 180 | if (nextInputOffset == length) |
| 181 | break; |
| 182 | ASSERT_NOT_REACHED(); |
| 183 | return -1; |
| 184 | } |
| 185 | inputOffset = nextInputOffset; |
| 186 | U16_APPEND(outputBuffer.data(), outputOffset, length, character, error); |
| 187 | if (error) { |
| 188 | ASSERT_NOT_REACHED(); |
| 189 | return -1; |
| 190 | } |
| 191 | } |
| 192 | |
| 193 | resultOutput.append(outputBuffer.data(), outputOffset); |
| 194 | return inputOffset; |
| 195 | } |
| 196 | |
| 197 | static bool saveResultToString(xmlDocPtr resultDoc, xsltStylesheetPtr sheet, String& resultString) |
| 198 | { |
| 199 | xmlOutputBufferPtr outputBuf = xmlAllocOutputBuffer(nullptr); |
| 200 | if (!outputBuf) |
| 201 | return false; |
| 202 | |
| 203 | StringBuilder resultBuilder; |
| 204 | outputBuf->context = &resultBuilder; |
| 205 | outputBuf->writecallback = writeToStringBuilder; |
| 206 | |
| 207 | int retval = xsltSaveResultTo(outputBuf, resultDoc, sheet); |
| 208 | xmlOutputBufferClose(outputBuf); |
| 209 | if (retval < 0) |
| 210 | return false; |
| 211 | |
| 212 | // Workaround for <http://bugzilla.gnome.org/show_bug.cgi?id=495668>: libxslt appends an extra line feed to the result. |
| 213 | if (resultBuilder.length() > 0 && resultBuilder[resultBuilder.length() - 1] == '\n') |
| 214 | resultBuilder.resize(resultBuilder.length() - 1); |
| 215 | |
| 216 | resultString = resultBuilder.toString(); |
| 217 | |
| 218 | return true; |
| 219 | } |
| 220 | |
| 221 | static const char** xsltParamArrayFromParameterMap(XSLTProcessor::ParameterMap& parameters) |
| 222 | { |
| 223 | if (parameters.isEmpty()) |
| 224 | return 0; |
| 225 | |
| 226 | const char** parameterArray = (const char**)fastMalloc(((parameters.size() * 2) + 1) * sizeof(char*)); |
| 227 | |
| 228 | unsigned index = 0; |
| 229 | for (auto& parameter : parameters) { |
| 230 | parameterArray[index++] = fastStrDup(parameter.key.utf8().data()); |
| 231 | parameterArray[index++] = fastStrDup(parameter.value.utf8().data()); |
| 232 | } |
| 233 | parameterArray[index] = nullptr; |
| 234 | |
| 235 | return parameterArray; |
| 236 | } |
| 237 | |
| 238 | static void freeXsltParamArray(const char** params) |
| 239 | { |
| 240 | const char** temp = params; |
| 241 | if (!params) |
| 242 | return; |
| 243 | |
| 244 | while (*temp) { |
| 245 | fastFree((void*)*(temp++)); |
| 246 | fastFree((void*)*(temp++)); |
| 247 | } |
| 248 | fastFree(params); |
| 249 | } |
| 250 | |
| 251 | static xsltStylesheetPtr xsltStylesheetPointer(RefPtr<XSLStyleSheet>& cachedStylesheet, Node* stylesheetRootNode) |
| 252 | { |
| 253 | if (!cachedStylesheet && stylesheetRootNode) { |
| 254 | cachedStylesheet = XSLStyleSheet::createForXSLTProcessor(stylesheetRootNode->parentNode() ? stylesheetRootNode->parentNode() : stylesheetRootNode, |
| 255 | stylesheetRootNode->document().url().string(), |
| 256 | stylesheetRootNode->document().url()); // FIXME: Should we use baseURL here? |
| 257 | |
| 258 | // According to Mozilla documentation, the node must be a Document node, an xsl:stylesheet or xsl:transform element. |
| 259 | // But we just use text content regardless of node type. |
| 260 | cachedStylesheet->parseString(serializeFragment(*stylesheetRootNode, SerializedNodes::SubtreeIncludingNode)); |
| 261 | } |
| 262 | |
| 263 | if (!cachedStylesheet || !cachedStylesheet->document()) |
| 264 | return 0; |
| 265 | |
| 266 | return cachedStylesheet->compileStyleSheet(); |
| 267 | } |
| 268 | |
| 269 | static inline xmlDocPtr xmlDocPtrFromNode(Node& sourceNode, bool& shouldDelete) |
| 270 | { |
| 271 | Ref<Document> ownerDocument(sourceNode.document()); |
| 272 | bool sourceIsDocument = (&sourceNode == &ownerDocument.get()); |
| 273 | |
| 274 | xmlDocPtr sourceDoc = nullptr; |
| 275 | if (sourceIsDocument && ownerDocument->transformSource()) |
| 276 | sourceDoc = ownerDocument->transformSource()->platformSource(); |
| 277 | if (!sourceDoc) { |
| 278 | sourceDoc = xmlDocPtrForString(ownerDocument->cachedResourceLoader(), serializeFragment(sourceNode, SerializedNodes::SubtreeIncludingNode), |
| 279 | sourceIsDocument ? ownerDocument->url().string() : String()); |
| 280 | shouldDelete = sourceDoc; |
| 281 | } |
| 282 | return sourceDoc; |
| 283 | } |
| 284 | |
| 285 | static inline String resultMIMEType(xmlDocPtr resultDoc, xsltStylesheetPtr sheet) |
| 286 | { |
| 287 | // There are three types of output we need to be able to deal with: |
| 288 | // HTML (create an HTML document), XML (create an XML document), |
| 289 | // and text (wrap in a <pre> and create an XML document). |
| 290 | |
| 291 | const xmlChar* resultType = nullptr; |
| 292 | XSLT_GET_IMPORT_PTR(resultType, sheet, method); |
| 293 | if (!resultType && resultDoc->type == XML_HTML_DOCUMENT_NODE) |
| 294 | resultType = (const xmlChar*)"html" ; |
| 295 | |
| 296 | if (xmlStrEqual(resultType, (const xmlChar*)"html" )) |
| 297 | return "text/html" ; |
| 298 | if (xmlStrEqual(resultType, (const xmlChar*)"text" )) |
| 299 | return "text/plain" ; |
| 300 | |
| 301 | return "application/xml" ; |
| 302 | } |
| 303 | |
| 304 | bool XSLTProcessor::transformToString(Node& sourceNode, String& mimeType, String& resultString, String& resultEncoding) |
| 305 | { |
| 306 | Ref<Document> ownerDocument(sourceNode.document()); |
| 307 | |
| 308 | setXSLTLoadCallBack(docLoaderFunc, this, &ownerDocument->cachedResourceLoader()); |
| 309 | xsltStylesheetPtr sheet = xsltStylesheetPointer(m_stylesheet, m_stylesheetRootNode.get()); |
| 310 | if (!sheet) { |
| 311 | setXSLTLoadCallBack(nullptr, nullptr, nullptr); |
| 312 | m_stylesheet = nullptr; |
| 313 | return false; |
| 314 | } |
| 315 | m_stylesheet->clearDocuments(); |
| 316 | |
| 317 | #if OS(DARWIN) && !PLATFORM(GTK) |
| 318 | int origXsltMaxDepth = *xsltMaxDepth; |
| 319 | *xsltMaxDepth = 1000; |
| 320 | #else |
| 321 | int origXsltMaxDepth = xsltMaxDepth; |
| 322 | xsltMaxDepth = 1000; |
| 323 | #endif |
| 324 | |
| 325 | xmlChar* origMethod = sheet->method; |
| 326 | if (!origMethod && mimeType == "text/html" ) |
| 327 | sheet->method = reinterpret_cast<xmlChar*>(const_cast<char*>("html" )); |
| 328 | |
| 329 | bool success = false; |
| 330 | bool shouldFreeSourceDoc = false; |
| 331 | if (xmlDocPtr sourceDoc = xmlDocPtrFromNode(sourceNode, shouldFreeSourceDoc)) { |
| 332 | // The XML declaration would prevent parsing the result as a fragment, and it's not needed even for documents, |
| 333 | // as the result of this function is always immediately parsed. |
| 334 | sheet->omitXmlDeclaration = true; |
| 335 | |
| 336 | xsltTransformContextPtr transformContext = xsltNewTransformContext(sheet, sourceDoc); |
| 337 | registerXSLTExtensions(transformContext); |
| 338 | |
| 339 | xsltSecurityPrefsPtr securityPrefs = xsltNewSecurityPrefs(); |
| 340 | // Read permissions are checked by docLoaderFunc. |
| 341 | if (0 != xsltSetSecurityPrefs(securityPrefs, XSLT_SECPREF_WRITE_FILE, xsltSecurityForbid)) |
| 342 | CRASH(); |
| 343 | if (0 != xsltSetSecurityPrefs(securityPrefs, XSLT_SECPREF_CREATE_DIRECTORY, xsltSecurityForbid)) |
| 344 | CRASH(); |
| 345 | if (0 != xsltSetSecurityPrefs(securityPrefs, XSLT_SECPREF_WRITE_NETWORK, xsltSecurityForbid)) |
| 346 | CRASH(); |
| 347 | if (0 != xsltSetCtxtSecurityPrefs(securityPrefs, transformContext)) |
| 348 | CRASH(); |
| 349 | |
| 350 | // <http://bugs.webkit.org/show_bug.cgi?id=16077>: XSLT processor <xsl:sort> algorithm only compares by code point. |
| 351 | xsltSetCtxtSortFunc(transformContext, xsltUnicodeSortFunction); |
| 352 | |
| 353 | // This is a workaround for a bug in libxslt. |
| 354 | // The bug has been fixed in version 1.1.13, so once we ship that this can be removed. |
| 355 | if (!transformContext->globalVars) |
| 356 | transformContext->globalVars = xmlHashCreate(20); |
| 357 | |
| 358 | const char** params = xsltParamArrayFromParameterMap(m_parameters); |
| 359 | xsltQuoteUserParams(transformContext, params); |
| 360 | xmlDocPtr resultDoc = xsltApplyStylesheetUser(sheet, sourceDoc, 0, 0, 0, transformContext); |
| 361 | |
| 362 | xsltFreeTransformContext(transformContext); |
| 363 | xsltFreeSecurityPrefs(securityPrefs); |
| 364 | freeXsltParamArray(params); |
| 365 | |
| 366 | if (shouldFreeSourceDoc) |
| 367 | xmlFreeDoc(sourceDoc); |
| 368 | |
| 369 | if ((success = saveResultToString(resultDoc, sheet, resultString))) { |
| 370 | mimeType = resultMIMEType(resultDoc, sheet); |
| 371 | resultEncoding = reinterpret_cast<const char*>(resultDoc->encoding); |
| 372 | } |
| 373 | xmlFreeDoc(resultDoc); |
| 374 | } |
| 375 | |
| 376 | sheet->method = origMethod; |
| 377 | #if OS(DARWIN) && !PLATFORM(GTK) |
| 378 | *xsltMaxDepth = origXsltMaxDepth; |
| 379 | #else |
| 380 | xsltMaxDepth = origXsltMaxDepth; |
| 381 | #endif |
| 382 | setXSLTLoadCallBack(0, 0, 0); |
| 383 | xsltFreeStylesheet(sheet); |
| 384 | m_stylesheet = nullptr; |
| 385 | |
| 386 | return success; |
| 387 | } |
| 388 | |
| 389 | } // namespace WebCore |
| 390 | |
| 391 | #endif // ENABLE(XSLT) |
| 392 | |