| 1 | /* | 
| 2 |  * Copyright (C) 1999 Lars Knoll (knoll@kde.org) | 
| 3 |  *           (C) 1999 Antti Koivisto (koivisto@kde.org) | 
| 4 |  *           (C) 2001 Dirk Mueller (mueller@kde.org) | 
| 5 |  * Copyright (C) 2004-2017 Apple Inc. All rights reserved. | 
| 6 |  *           (C) 2006 Alexey Proskuryakov (ap@nypop.com) | 
| 7 |  * Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) | 
| 8 |  * | 
| 9 |  * This library is free software; you can redistribute it and/or | 
| 10 |  * modify it under the terms of the GNU Library General Public | 
| 11 |  * License as published by the Free Software Foundation; either | 
| 12 |  * version 2 of the License, or (at your option) any later version. | 
| 13 |  * | 
| 14 |  * This library is distributed in the hope that it will be useful, | 
| 15 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 16 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
| 17 |  * Library General Public License for more details. | 
| 18 |  * | 
| 19 |  * You should have received a copy of the GNU Library General Public License | 
| 20 |  * along with this library; see the file COPYING.LIB.  If not, write to | 
| 21 |  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | 
| 22 |  * Boston, MA 02110-1301, USA. | 
| 23 |  */ | 
| 24 |  | 
| 25 | #include "config.h" | 
| 26 | #include "FormDataBuilder.h" | 
| 27 |  | 
| 28 | #include "Blob.h" | 
| 29 | #include "TextEncoding.h" | 
| 30 | #include <limits> | 
| 31 | #include <wtf/Assertions.h> | 
| 32 | #include <wtf/HexNumber.h> | 
| 33 | #include <wtf/RandomNumber.h> | 
| 34 | #include <wtf/text/CString.h> | 
| 35 | #include <wtf/text/StringView.h> | 
| 36 |  | 
| 37 | namespace WebCore { | 
| 38 |  | 
| 39 | namespace FormDataBuilder { | 
| 40 |  | 
| 41 | static inline void append(Vector<char>& buffer, char string) | 
| 42 | { | 
| 43 |     buffer.append(string); | 
| 44 | } | 
| 45 |  | 
| 46 | static inline void append(Vector<char>& buffer, const char* string) | 
| 47 | { | 
| 48 |     buffer.append(string, strlen(string)); | 
| 49 | } | 
| 50 |  | 
| 51 | static inline void append(Vector<char>& buffer, const CString& string) | 
| 52 | { | 
| 53 |     buffer.append(string.data(), string.length()); | 
| 54 | } | 
| 55 |  | 
| 56 | static inline void append(Vector<char>& buffer, const Vector<uint8_t>& string) | 
| 57 | { | 
| 58 |     buffer.appendVector(string); | 
| 59 | } | 
| 60 |  | 
| 61 | static void appendQuoted(Vector<char>& buffer, const Vector<uint8_t>& string) | 
| 62 | { | 
| 63 |     // Append a string as a quoted value, escaping quotes and line breaks. | 
| 64 |     // FIXME: Is it correct to use percent escaping here? When this code was originally written, | 
| 65 |     // other browsers were not encoding these characters, so someone should test servers or do | 
| 66 |     // research to find out if there is an encoding form that works well. | 
| 67 |     // FIXME: If we want to use percent escaping sensibly, we need to escape "%" characters too. | 
| 68 |     size_t size = string.size(); | 
| 69 |     for (size_t i = 0; i < size; ++i) { | 
| 70 |         auto character = string[i]; | 
| 71 |         switch (character) { | 
| 72 |         case 0xA: | 
| 73 |             append(buffer, "%0A" ); | 
| 74 |             break; | 
| 75 |         case 0xD: | 
| 76 |             append(buffer, "%0D" ); | 
| 77 |             break; | 
| 78 |         case '"': | 
| 79 |             append(buffer, "%22" ); | 
| 80 |             break; | 
| 81 |         default: | 
| 82 |             append(buffer, character); | 
| 83 |         } | 
| 84 |     } | 
| 85 | } | 
| 86 |  | 
| 87 | // https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer | 
| 88 | static void appendFormURLEncoded(Vector<char>& buffer, const uint8_t* string, size_t length) | 
| 89 | { | 
| 90 |     static const char safeCharacters[] = "-._*" ; | 
| 91 |     for (size_t i = 0; i < length; ++i) { | 
| 92 |         auto character = string[i]; | 
| 93 |         if (isASCIIAlphanumeric(character) || strchr(safeCharacters, character)) | 
| 94 |             append(buffer, character); | 
| 95 |         else if (character == ' ') | 
| 96 |             append(buffer, '+'); | 
| 97 |         else if (character == '\n' || (character == '\r' && (i + 1 >= length || string[i + 1] != '\n'))) | 
| 98 |             append(buffer, "%0D%0A" ); // FIXME: Unclear exactly where this rule about normalizing line endings to CRLF comes from. | 
| 99 |         else if (character != '\r') { | 
| 100 |             append(buffer, '%'); | 
| 101 |             appendByteAsHex(character, buffer); | 
| 102 |         } | 
| 103 |     } | 
| 104 | } | 
| 105 |  | 
| 106 | static void appendFormURLEncoded(Vector<char>& buffer, const Vector<uint8_t>& string) | 
| 107 | { | 
| 108 |     appendFormURLEncoded(buffer, string.data(), string.size()); | 
| 109 | } | 
| 110 |  | 
| 111 | Vector<char> generateUniqueBoundaryString() | 
| 112 | { | 
| 113 |     Vector<char> boundary; | 
| 114 |  | 
| 115 |     // The RFC 2046 spec says the alphanumeric characters plus the | 
| 116 |     // following characters are legal for boundaries:  '()+_,-./:=? | 
| 117 |     // However the following characters, though legal, cause some sites | 
| 118 |     // to fail: (),./:=+ | 
| 119 |     // Note that our algorithm makes it twice as much likely for 'A' or 'B' | 
| 120 |     // to appear in the boundary string, because 0x41 and 0x42 are present in | 
| 121 |     // the below array twice. | 
| 122 |     static const char alphaNumericEncodingMap[64] = { | 
| 123 |         0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, | 
| 124 |         0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, | 
| 125 |         0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, | 
| 126 |         0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, | 
| 127 |         0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, | 
| 128 |         0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, | 
| 129 |         0x77, 0x78, 0x79, 0x7A, 0x30, 0x31, 0x32, 0x33, | 
| 130 |         0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41, 0x42 | 
| 131 |     }; | 
| 132 |  | 
| 133 |     // Start with an informative prefix. | 
| 134 |     append(boundary, "----WebKitFormBoundary" ); | 
| 135 |  | 
| 136 |     // Append 16 random 7bit ascii AlphaNumeric characters. | 
| 137 |     Vector<char> randomBytes; | 
| 138 |  | 
| 139 |     for (unsigned i = 0; i < 4; ++i) { | 
| 140 |         unsigned randomness = static_cast<unsigned>(randomNumber() * (std::numeric_limits<unsigned>::max() + 1.0)); | 
| 141 |         randomBytes.append(alphaNumericEncodingMap[(randomness >> 24) & 0x3F]); | 
| 142 |         randomBytes.append(alphaNumericEncodingMap[(randomness >> 16) & 0x3F]); | 
| 143 |         randomBytes.append(alphaNumericEncodingMap[(randomness >> 8) & 0x3F]); | 
| 144 |         randomBytes.append(alphaNumericEncodingMap[randomness & 0x3F]); | 
| 145 |     } | 
| 146 |  | 
| 147 |     boundary.appendVector(randomBytes); | 
| 148 |     boundary.append(0); // Add a 0 at the end so we can use this as a C-style string. | 
| 149 |     return boundary; | 
| 150 | } | 
| 151 |  | 
| 152 | void (Vector<char>& buffer, const CString& boundary, const Vector<uint8_t>& name) | 
| 153 | { | 
| 154 |     addBoundaryToMultiPartHeader(buffer, boundary); | 
| 155 |  | 
| 156 |     // FIXME: This loses data irreversibly if the input name includes characters you can't encode | 
| 157 |     // in the website's character set. | 
| 158 |     append(buffer, "Content-Disposition: form-data; name=\"" ); | 
| 159 |     appendQuoted(buffer, name); | 
| 160 |     append(buffer, '"'); | 
| 161 | } | 
| 162 |  | 
| 163 | void (Vector<char>& buffer, const CString& boundary, bool isLastBoundary) | 
| 164 | { | 
| 165 |     append(buffer, "--" ); | 
| 166 |     append(buffer, boundary); | 
| 167 |  | 
| 168 |     if (isLastBoundary) | 
| 169 |         append(buffer, "--" ); | 
| 170 |  | 
| 171 |     append(buffer, "\r\n" ); | 
| 172 | } | 
| 173 |  | 
| 174 | void (Vector<char>& buffer, const TextEncoding& encoding, const String& filename) | 
| 175 | { | 
| 176 |     // FIXME: This loses data irreversibly if the filename includes characters you can't encode | 
| 177 |     // in the website's character set. | 
| 178 |     append(buffer, "; filename=\"" ); | 
| 179 |     appendQuoted(buffer, encoding.encode(filename, UnencodableHandling::QuestionMarks)); | 
| 180 |     append(buffer, '"'); | 
| 181 | } | 
| 182 |  | 
| 183 | void (Vector<char>& buffer, const CString& mimeType) | 
| 184 | { | 
| 185 |     ASSERT(Blob::isNormalizedContentType(mimeType)); | 
| 186 |     append(buffer, "\r\nContent-Type: " ); | 
| 187 |     append(buffer, mimeType); | 
| 188 | } | 
| 189 |  | 
| 190 | void (Vector<char>& buffer) | 
| 191 | { | 
| 192 |     append(buffer, "\r\n\r\n" ); | 
| 193 | } | 
| 194 |  | 
| 195 | void addKeyValuePairAsFormData(Vector<char>& buffer, const Vector<uint8_t>& key, const Vector<uint8_t>& value, FormData::EncodingType encodingType) | 
| 196 | { | 
| 197 |     if (encodingType == FormData::TextPlain) { | 
| 198 |         if (!buffer.isEmpty()) | 
| 199 |             append(buffer, "\r\n" ); | 
| 200 |         append(buffer, key); | 
| 201 |         append(buffer, '='); | 
| 202 |         append(buffer, value); | 
| 203 |     } else { | 
| 204 |         if (!buffer.isEmpty()) | 
| 205 |             append(buffer, '&'); | 
| 206 |         appendFormURLEncoded(buffer, key); | 
| 207 |         append(buffer, '='); | 
| 208 |         appendFormURLEncoded(buffer, value); | 
| 209 |     } | 
| 210 | } | 
| 211 |  | 
| 212 | void encodeStringAsFormData(Vector<char>& buffer, const CString& string) | 
| 213 | { | 
| 214 |     appendFormURLEncoded(buffer, reinterpret_cast<const uint8_t*>(string.data()), string.length()); | 
| 215 | } | 
| 216 |  | 
| 217 | } | 
| 218 |  | 
| 219 | } | 
| 220 |  |