| 1 | /* |
| 2 | Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) |
| 3 | Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com) |
| 4 | Copyright (C) 2006-2017 Apple Inc. All rights reserved. |
| 5 | |
| 6 | This library is free software; you can redistribute it and/or |
| 7 | modify it under the terms of the GNU Library General Public |
| 8 | License as published by the Free Software Foundation; either |
| 9 | version 2 of the License, or (at your option) any later version. |
| 10 | |
| 11 | This library is distributed in the hope that it will be useful, |
| 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | Library General Public License for more details. |
| 15 | |
| 16 | You should have received a copy of the GNU Library General Public License |
| 17 | along with this library; see the file COPYING.LIB. If not, write to |
| 18 | the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| 19 | Boston, MA 02110-1301, USA. |
| 20 | |
| 21 | */ |
| 22 | |
| 23 | #pragma once |
| 24 | |
| 25 | #include "TextEncoding.h" |
| 26 | #include <wtf/RefCounted.h> |
| 27 | |
| 28 | namespace WebCore { |
| 29 | |
| 30 | class HTMLMetaCharsetParser; |
| 31 | class TextCodec; |
| 32 | |
| 33 | class TextResourceDecoder : public RefCounted<TextResourceDecoder> { |
| 34 | public: |
| 35 | enum EncodingSource { |
| 36 | DefaultEncoding, |
| 37 | AutoDetectedEncoding, |
| 38 | , |
| 39 | EncodingFromMetaTag, |
| 40 | EncodingFromCSSCharset, |
| 41 | , |
| 42 | UserChosenEncoding, |
| 43 | EncodingFromParentFrame |
| 44 | }; |
| 45 | |
| 46 | WEBCORE_EXPORT static Ref<TextResourceDecoder> create(const String& mimeType, const TextEncoding& defaultEncoding = { }, bool usesEncodingDetector = false); |
| 47 | WEBCORE_EXPORT ~TextResourceDecoder(); |
| 48 | |
| 49 | void setEncoding(const TextEncoding&, EncodingSource); |
| 50 | const TextEncoding& encoding() const { return m_encoding; } |
| 51 | const TextEncoding* encodingForURLParsing(); |
| 52 | |
| 53 | bool hasEqualEncodingForCharset(const String& charset) const; |
| 54 | |
| 55 | WEBCORE_EXPORT String decode(const char* data, size_t length); |
| 56 | WEBCORE_EXPORT String flush(); |
| 57 | |
| 58 | WEBCORE_EXPORT String decodeAndFlush(const char* data, size_t length); |
| 59 | |
| 60 | void setHintEncoding(const TextResourceDecoder* parentFrameDecoder); |
| 61 | |
| 62 | void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; } |
| 63 | bool sawError() const { return m_sawError; } |
| 64 | |
| 65 | private: |
| 66 | TextResourceDecoder(const String& mimeType, const TextEncoding& defaultEncoding, bool usesEncodingDetector); |
| 67 | |
| 68 | enum ContentType { PlainText, HTML, XML, CSS }; // PlainText only checks for BOM. |
| 69 | static ContentType determineContentType(const String& mimeType); |
| 70 | static const TextEncoding& defaultEncoding(ContentType, const TextEncoding& defaultEncoding); |
| 71 | |
| 72 | size_t checkForBOM(const char*, size_t); |
| 73 | bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer); |
| 74 | bool checkForHeadCharset(const char*, size_t, bool& movedDataToBuffer); |
| 75 | bool checkForMetaCharset(const char*, size_t); |
| 76 | void detectJapaneseEncoding(const char*, size_t); |
| 77 | bool shouldAutoDetect() const; |
| 78 | |
| 79 | ContentType m_contentType; |
| 80 | TextEncoding m_encoding; |
| 81 | std::unique_ptr<TextCodec> m_codec; |
| 82 | std::unique_ptr<HTMLMetaCharsetParser> m_charsetParser; |
| 83 | EncodingSource m_source { DefaultEncoding }; |
| 84 | const char* m_parentFrameAutoDetectedEncoding { nullptr }; |
| 85 | Vector<char> m_buffer; |
| 86 | bool m_checkedForBOM { false }; |
| 87 | bool m_checkedForCSSCharset { false }; |
| 88 | bool m_checkedForHeadCharset { false }; |
| 89 | bool m_useLenientXMLDecoding { false }; // Don't stop on XML decoding errors. |
| 90 | bool m_sawError { false }; |
| 91 | bool m_usesEncodingDetector { false }; |
| 92 | }; |
| 93 | |
| 94 | inline void TextResourceDecoder::setHintEncoding(const TextResourceDecoder* parentFrameDecoder) |
| 95 | { |
| 96 | if (parentFrameDecoder && parentFrameDecoder->m_source == AutoDetectedEncoding) |
| 97 | m_parentFrameAutoDetectedEncoding = parentFrameDecoder->encoding().name(); |
| 98 | } |
| 99 | |
| 100 | } // namespace WebCore |
| 101 | |