HTTPParsers.cpp source code [webkit/Source/WebCore/platform/network/HTTPParsers.cpp]

1	/*
2	* Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
3	* Copyright (C) 2006-2017 Apple Inc. All rights reserved.
4	* Copyright (C) 2009 Torch Mobile Inc. http://www.torchmobile.com/
5	* Copyright (C) 2009 Google Inc. All rights reserved.
6	* Copyright (C) 2011 Apple Inc. All Rights Reserved.
7	*
8	* Redistribution and use in source and binary forms, with or without
9	* modification, are permitted provided that the following conditions
10	* are met:
11	*
12	* 1. Redistributions of source code must retain the above copyright
13	* notice, this list of conditions and the following disclaimer.
14	* 2. Redistributions in binary form must reproduce the above copyright
15	* notice, this list of conditions and the following disclaimer in the
16	* documentation and/or other materials provided with the distribution.
17	* 3. Neither the name of Apple Inc. ("Apple") nor the names of
18	* its contributors may be used to endorse or promote products derived
19	* from this software without specific prior written permission.
20	*
21	* THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
22	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23	* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24	* DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
25	* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26	* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27	* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
28	* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31	*/
32
33	#include "config.h"
34	#include "HTTPParsers.h"
35
36	#include "HTTPHeaderNames.h"
37	#include <wtf/DateMath.h>
38	#include <wtf/Language.h>
39	#include <wtf/NeverDestroyed.h>
40	#include <wtf/Optional.h>
41	#include <wtf/text/StringBuilder.h>
42	#include <wtf/unicode/CharacterNames.h>
43
44
45	namespace WebCore {
46
47	// true if there is more to parse, after incrementing pos past whitespace.
48	// Note: Might return pos == str.length()
49	static inline bool skipWhiteSpace(const String& str, unsigned& pos)
50	{
51	unsigned len = str.length();
52
53	while (pos < len && (str [pos] == `'\t'` \|\| str [pos] == `' '`))
54	++pos;
55
56	return pos < len;
57	}
58
59	// Returns true if the function can match the whole token (case insensitive)
60	// incrementing pos on match, otherwise leaving pos unchanged.
61	// Note: Might return pos == str.length()
62	static inline bool skipToken(const String& str, unsigned& pos, const char* token)
63	{
64	unsigned len = str.length();
65	unsigned current = pos;
66
67	while (current < len && *token) {
68	if (toASCIILower(str [current]) != *token++)
69	return false;
70	++current;
71	}
72
73	if (*token)
74	return false;
75
76	pos = current;
77	return true;
78	}
79
80	// True if the expected equals sign is seen and there is more to follow.
81	static inline bool skipEquals(const String& str, unsigned &pos)
82	{
83	return skipWhiteSpace(str, pos) && str [pos++] == `'='` && skipWhiteSpace(str, pos);
84	}
85
86	// True if a value present, incrementing pos to next space or semicolon, if any.
87	// Note: might return pos == str.length().
88	static inline bool skipValue(const String& str, unsigned& pos)
89	{
90	unsigned start = pos;
91	unsigned len = str.length();
92	while (pos < len) {
93	if (str [pos] == `' '` \|\| str [pos] == `'\t'` \|\| str [pos] == `';'`)
94	break;
95	++pos;
96	}
97	return pos != start;
98	}
99
100	// See RFC 7230, Section 3.1.2.
101	bool isValidReasonPhrase(const String& value)
102	{
103	for (unsigned i = `0`; i < value.length(); ++i) {
104	UChar c = value [i];
105	if (c == `0x7F` \|\| c > `0xFF` \|\| (c < `0x20` && c != `'\t'`))
106	return false;
107	}
108	return true;
109	}
110
111	// See https://fetch.spec.whatwg.org/#concept-header
112	bool isValidHTTPHeaderValue(const String& value)
113	{
114	UChar c = value [`0`];
115	if (c == `' '` \|\| c == `'\t'`)
116	return false;
117	c = value [value.length() - `1`];
118	if (c == `' '` \|\| c == `'\t'`)
119	return false;
120	for (unsigned i = `0`; i < value.length(); ++i) {
121	c = value [i];
122	ASSERT(c <= `0xFF`);
123	if (c == `0x00` \|\| c == `0x0A` \|\| c == `0x0D`)
124	return false;
125	}
126	return true;
127	}
128
129	// See RFC 7230, Section 3.2.6.
130	static bool isDelimiterCharacter(const UChar c)
131	{
132	// DQUOTE and "(),/:;<=>?@[\]{}"
133	return (c == `'"'` \|\| c == `'('` \|\| c == `')'` \|\| c == `','` \|\| c == `'/'` \|\| c == `':'` \|\| c == `';'`
134	\|\| c == `'<'` \|\| c == `'='` \|\| c == `'>'` \|\| c == `'?'` \|\| c == `'@'` \|\| c == `'['` \|\| c == `'\\'`
135	\|\| c == `']'` \|\| c == `'{'` \|\| c == `'}'`);
136	}
137
138	// See RFC 7231, Section 5.3.2.
139	bool isValidAcceptHeaderValue(const String& value)
140	{
141	for (unsigned i = `0`; i < value.length(); ++i) {
142	UChar c = value [i];
143
144	// First check for alphanumeric for performance reasons then whitelist four delimiter characters.
145	if (isASCIIAlphanumeric(c) \|\| c == `','` \|\| c == `'/'` \|\| c == `';'` \|\| c == `'='`)
146	continue;
147
148	ASSERT(c <= `0xFF`);
149	if (c == `0x7F` \|\| (c < `0x20` && c != `'\t'`))
150	return false;
151
152	if (isDelimiterCharacter(c))
153	return false;
154	}
155
156	return true;
157	}
158
159	// See RFC 7231, Section 5.3.5 and 3.1.3.2.
160	bool isValidLanguageHeaderValue(const String& value)
161	{
162	for (unsigned i = `0`; i < value.length(); ++i) {
163	UChar c = value [i];
164	if (isASCIIAlphanumeric(c) \|\| c == `' '` \|\| c == `'*'` \|\| c == `','` \|\| c == `'-'` \|\| c == `'.'` \|\| c == `';'` \|\| c == `'='`)
165	continue;
166	return false;
167	}
168
169	// FIXME: Validate further by splitting into language tags and optional quality
170	// values (q=) and then check each language tag.
171	// Language tags https://tools.ietf.org/html/rfc7231#section-3.1.3.1
172	// Language tag syntax https://tools.ietf.org/html/bcp47#section-2.1
173	return true;
174	}
175
176	// See RFC 7230, Section 3.2.6.
177	bool isValidHTTPToken(const String& value)
178	{
179	if (value.isEmpty())
180	return false;
181	auto valueStringView = StringView (value);
182	for (UChar c : valueStringView.codeUnits()) {
183	if (c <= `0x20` \|\| c >= `0x7F`
184	\|\| c == `'('` \|\| c == `')'` \|\| c == `'<'` \|\| c == `'>'` \|\| c == `'@'`
185	\|\| c == `','` \|\| c == `';'` \|\| c == `':'` \|\| c == `'\\'` \|\| c == `'"'`
186	\|\| c == `'/'` \|\| c == `'['` \|\| c == `']'` \|\| c == `'?'` \|\| c == `'='`
187	\|\| c == `'{'` \|\| c == `'}'`)
188	return false;
189	}
190	return true;
191	}
192
193	static const size_t maxInputSampleSize = `128`;
194	static String trimInputSample(const char* p, size_t length)
195	{
196	String s = String (p, std::min<size_t>(length, maxInputSampleSize));
197	if (length > maxInputSampleSize)
198	s.append(horizontalEllipsis);
199	return s;
200	}
201
202	bool parseHTTPRefresh(const String& refresh, double& delay, String& url)
203	{
204	unsigned len = refresh.length();
205	unsigned pos = `0`;
206
207	if (!skipWhiteSpace(refresh, pos))
208	return false;
209
210	while (pos != len && refresh [pos] != `','` && refresh [pos] != `';'`)
211	++pos;
212
213	if (pos == len) { // no URL
214	url = String ();
215	bool ok;
216	delay = refresh.stripWhiteSpace().toDouble(&ok);
217	return ok;
218	} else {
219	bool ok;
220	delay = refresh.left(pos).stripWhiteSpace().toDouble(&ok);
221	if (!ok)
222	return false;
223
224	++pos;
225	skipWhiteSpace(refresh, pos);
226	unsigned urlStartPos = pos;
227	if (refresh.findIgnoringASCIICase("url", urlStartPos) == urlStartPos) {
228	urlStartPos += `3`;
229	skipWhiteSpace(refresh, urlStartPos);
230	if (refresh [urlStartPos] == `'='`) {
231	++urlStartPos;
232	skipWhiteSpace(refresh, urlStartPos);
233	} else
234	urlStartPos = pos; // e.g. "Refresh: 0; url.html"
235	}
236
237	unsigned urlEndPos = len;
238
239	if (refresh [urlStartPos] == `'"'` \|\| refresh [urlStartPos] == `'\''`) {
240	UChar quotationMark = refresh [urlStartPos];
241	urlStartPos++;
242	while (urlEndPos > urlStartPos) {
243	urlEndPos--;
244	if (refresh [urlEndPos] == quotationMark)
245	break;
246	}
247
248	// https://bugs.webkit.org/show_bug.cgi?id=27868
249	// Sometimes there is no closing quote for the end of the URL even though there was an opening quote.
250	// If we looped over the entire alleged URL string back to the opening quote, just use everything
251	// after the opening quote instead.
252	if (urlEndPos == urlStartPos)
253	urlEndPos = len;
254	}
255
256	url = refresh.substring(urlStartPos, urlEndPos - urlStartPos).stripWhiteSpace();
257	return true;
258	}
259	}
260
261	Optional<WallTime> parseHTTPDate(const String& value)
262	{
263	double dateInMillisecondsSinceEpoch = parseDateFromNullTerminatedCharacters(value.utf8().data());
264	if (!std::isfinite(dateInMillisecondsSinceEpoch))
265	return WTF::nullopt;
266	// This assumes system_clock epoch equals Unix epoch which is true for all implementations but unspecified.
267	// FIXME: The parsing function should be switched to WallTime too.
268	return WallTime::fromRawSeconds(dateInMillisecondsSinceEpoch / `1000.0`);
269	}
270
271	// FIXME: This function doesn't comply with RFC 6266.
272	// For example, this function doesn't handle the interaction between " and ;
273	// that arises from quoted-string, nor does this function properly unquote
274	// attribute values. Further this function appears to process parameter names
275	// in a case-sensitive manner. (There are likely other bugs as well.)
276	String filenameFromHTTPContentDisposition(const String& value)
277	{
278	for (auto& keyValuePair : value.split(`';'`)) {
279	size_t valueStartPos = keyValuePair.find(`'='`);
280	if (valueStartPos == notFound)
281	continue;
282
283	String key = keyValuePair.left(valueStartPos).stripWhiteSpace();
284
285	if (key.isEmpty() \|\| key != "filename")
286	continue;
287
288	String value = keyValuePair.substring(valueStartPos + `1`).stripWhiteSpace();
289
290	// Remove quotes if there are any
291	if (value [`0`] == `'\"'`)
292	value = value.substring(`1`, value.length() - `2`);
293
294	return value;
295	}
296
297	return String ();
298	}
299
300	String extractMIMETypeFromMediaType(const String& mediaType)
301	{
302	unsigned position = `0`;
303	unsigned length = mediaType.length();
304
305	for (; position < length; ++position) {
306	UChar c = mediaType [position];
307	if (c != `'\t'` && c != `' '`)
308	break;
309	}
310
311	if (position == length)
312	return mediaType;
313
314	unsigned typeStart = position;
315
316	unsigned typeEnd = position;
317	for (; position < length; ++position) {
318	UChar c = mediaType [position];
319
320	// While RFC 2616 does not allow it, other browsers allow multiple values in the HTTP media
321	// type header field, Content-Type. In such cases, the media type string passed here may contain
322	// the multiple values separated by commas. For now, this code ignores text after the first comma,
323	// which prevents it from simply failing to parse such types altogether. Later for better
324	// compatibility we could consider using the first or last valid MIME type instead.
325	// See https://bugs.webkit.org/show_bug.cgi?id=25352 for more discussion.
326	if (c == `','`)
327	break;
328
329	if (c == `'\t'` \|\| c == `' '` \|\| c == `';'`)
330	break;
331
332	typeEnd = position + `1`;
333	}
334
335	return mediaType.substring(typeStart, typeEnd - typeStart);
336	}
337
338	String extractCharsetFromMediaType(const String& mediaType)
339	{
340	unsigned int pos, len;
341	findCharsetInMediaType(mediaType, pos, len);
342	return mediaType.substring(pos, len);
343	}
344
345	void findCharsetInMediaType(const String& mediaType, unsigned int& charsetPos, unsigned int& charsetLen, unsigned int start)
346	{
347	charsetPos = start;
348	charsetLen = `0`;
349
350	size_t pos = start;
351	unsigned length = mediaType.length();
352
353	while (pos < length) {
354	pos = mediaType.findIgnoringASCIICase("charset", pos);
355	if (pos == notFound \|\| pos == `0`) {
356	charsetLen = `0`;
357	return;
358	}
359
360	// is what we found a beginning of a word?
361	if (mediaType [pos-`1`] > `' '` && mediaType [pos-`1`] != `';'`) {
362	pos += `7`;
363	continue;
364	}
365
366	pos += `7`;
367
368	// skip whitespace
369	while (pos != length && mediaType [pos] <= `' '`)
370	++pos;
371
372	if (mediaType [pos++] != `'='`) // this "charset" substring wasn't a parameter name, but there may be others
373	continue;
374
375	while (pos != length && (mediaType [pos] <= `' '` \|\| mediaType [pos] == `'"'` \|\| mediaType [pos] == `'\''`))
376	++pos;
377
378	// we don't handle spaces within quoted parameter values, because charset names cannot have any
379	unsigned endpos = pos;
380	while (pos != length && mediaType [endpos] > `' '` && mediaType [endpos] != `'"'` && mediaType [endpos] != `'\''` && mediaType [endpos] != `';'`)
381	++endpos;
382
383	charsetPos = pos;
384	charsetLen = endpos - pos;
385	return;
386	}
387	}
388
389	XSSProtectionDisposition parseXSSProtectionHeader(const String& header, String& failureReason, unsigned& failurePosition, String& reportURL)
390	{
391	static NeverDestroyed<String> failureReasonInvalidToggle(MAKE_STATIC_STRING_IMPL("expected 0 or 1"));
392	static NeverDestroyed<String> failureReasonInvalidSeparator(MAKE_STATIC_STRING_IMPL("expected semicolon"));
393	static NeverDestroyed<String> failureReasonInvalidEquals(MAKE_STATIC_STRING_IMPL("expected equals sign"));
394	static NeverDestroyed<String> failureReasonInvalidMode(MAKE_STATIC_STRING_IMPL("invalid mode directive"));
395	static NeverDestroyed<String> failureReasonInvalidReport(MAKE_STATIC_STRING_IMPL("invalid report directive"));
396	static NeverDestroyed<String> failureReasonDuplicateMode(MAKE_STATIC_STRING_IMPL("duplicate mode directive"));
397	static NeverDestroyed<String> failureReasonDuplicateReport(MAKE_STATIC_STRING_IMPL("duplicate report directive"));
398	static NeverDestroyed<String> failureReasonInvalidDirective(MAKE_STATIC_STRING_IMPL("unrecognized directive"));
399
400	unsigned pos = `0`;
401
402	if (!skipWhiteSpace(header, pos))
403	return XSSProtectionDisposition::Enabled;
404
405	if (header [pos] == `'0'`)
406	return XSSProtectionDisposition::Disabled;
407
408	if (header [pos++] != `'1'`) {
409	failureReason = failureReasonInvalidToggle;
410	return XSSProtectionDisposition::Invalid;
411	}
412
413	XSSProtectionDisposition result = XSSProtectionDisposition::Enabled;
414	bool modeDirectiveSeen = false;
415	bool reportDirectiveSeen = false;
416
417	while (`1`) {
418	// At end of previous directive: consume whitespace, semicolon, and whitespace.
419	if (!skipWhiteSpace(header, pos))
420	return result;
421
422	if (header [pos++] != `';'`) {
423	failureReason = failureReasonInvalidSeparator;
424	failurePosition = pos;
425	return XSSProtectionDisposition::Invalid;
426	}
427
428	if (!skipWhiteSpace(header, pos))
429	return result;
430
431	// At start of next directive.
432	if (skipToken(header, pos, "mode")) {
433	if (modeDirectiveSeen) {
434	failureReason = failureReasonDuplicateMode;
435	failurePosition = pos;
436	return XSSProtectionDisposition::Invalid;
437	}
438	modeDirectiveSeen = true;
439	if (!skipEquals(header, pos)) {
440	failureReason = failureReasonInvalidEquals;
441	failurePosition = pos;
442	return XSSProtectionDisposition::Invalid;
443	}
444	if (!skipToken(header, pos, "block")) {
445	failureReason = failureReasonInvalidMode;
446	failurePosition = pos;
447	return XSSProtectionDisposition::Invalid;
448	}
449	result = XSSProtectionDisposition::BlockEnabled;
450	} else if (skipToken(header, pos, "report")) {
451	if (reportDirectiveSeen) {
452	failureReason = failureReasonDuplicateReport;
453	failurePosition = pos;
454	return XSSProtectionDisposition::Invalid;
455	}
456	reportDirectiveSeen = true;
457	if (!skipEquals(header, pos)) {
458	failureReason = failureReasonInvalidEquals;
459	failurePosition = pos;
460	return XSSProtectionDisposition::Invalid;
461	}
462	size_t startPos = pos;
463	if (!skipValue(header, pos)) {
464	failureReason = failureReasonInvalidReport;
465	failurePosition = pos;
466	return XSSProtectionDisposition::Invalid;
467	}
468	reportURL = header.substring(startPos, pos - startPos);
469	failurePosition = startPos; // If later semantic check deems unacceptable.
470	} else {
471	failureReason = failureReasonInvalidDirective;
472	failurePosition = pos;
473	return XSSProtectionDisposition::Invalid;
474	}
475	}
476	}
477
478	ContentTypeOptionsDisposition parseContentTypeOptionsHeader(StringView header)
479	{
480	StringView leftToken = header.left(header.find(`','`));
481	if (equalLettersIgnoringASCIICase(stripLeadingAndTrailingHTTPSpaces(leftToken), "nosniff"))
482	return ContentTypeOptionsNosniff;
483	return ContentTypeOptionsNone;
484	}
485
486	// For example: "HTTP/1.1 200 OK" => "OK".
487	// Note that HTTP/2 does not include a reason phrase, so we return the empty atom.
488	AtomicString extractReasonPhraseFromHTTPStatusLine(const String& statusLine)
489	{
490	StringView view = statusLine;
491	size_t spacePos = view.find(`' '`);
492
493	// Remove status code from the status line.
494	spacePos = view.find(`' '`, spacePos + `1`);
495	if (spacePos == notFound)
496	return emptyAtom();
497
498	return view.substring(spacePos + `1`).toAtomicString();
499	}
500
501	XFrameOptionsDisposition parseXFrameOptionsHeader(const String& header)
502	{
503	XFrameOptionsDisposition result = XFrameOptionsNone;
504
505	if (header.isEmpty())
506	return result;
507
508	for (auto& currentHeader : header.split(`','`)) {
509	currentHeader = currentHeader.stripWhiteSpace();
510	XFrameOptionsDisposition currentValue = XFrameOptionsNone;
511	if (equalLettersIgnoringASCIICase(currentHeader, "deny"))
512	currentValue = XFrameOptionsDeny;
513	else if (equalLettersIgnoringASCIICase(currentHeader, "sameorigin"))
514	currentValue = XFrameOptionsSameOrigin;
515	else if (equalLettersIgnoringASCIICase(currentHeader, "allowall"))
516	currentValue = XFrameOptionsAllowAll;
517	else
518	currentValue = XFrameOptionsInvalid;
519
520	if (result == XFrameOptionsNone)
521	result = currentValue;
522	else if (result != currentValue)
523	return XFrameOptionsConflict;
524	}
525	return result;
526	}
527
528	bool parseRange(const String& range, long long& rangeOffset, long long& rangeEnd, long long& rangeSuffixLength)
529	{
530	// The format of "Range" header is defined in RFC 2616 Section 14.35.1.
531	// http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35.1
532	// We don't support multiple range requests.
533
534	rangeOffset = rangeEnd = rangeSuffixLength = -`1`;
535
536	// The "bytes" unit identifier should be present.
537	static const unsigned bytesLength = `6`;
538	if (!startsWithLettersIgnoringASCIICase(range, "bytes="))
539	return false;
540	// FIXME: The rest of this should use StringView.
541	String byteRange = range.substring(bytesLength);
542
543	// The '-' character needs to be present.
544	int index = byteRange.find(`'-'`);
545	if (index == -`1`)
546	return false;
547
548	// If the '-' character is at the beginning, the suffix length, which specifies the last N bytes, is provided.
549	// Example:
550	// -500
551	if (!index) {
552	String suffixLengthString = byteRange.substring(index + `1`).stripWhiteSpace();
553	bool ok;
554	long long value = suffixLengthString.toInt64Strict(&ok);
555	if (ok)
556	rangeSuffixLength = value;
557	return true;
558	}
559
560	// Otherwise, the first-byte-position and the last-byte-position are provied.
561	// Examples:
562	// 0-499
563	// 500-
564	String firstBytePosStr = byteRange.left(index).stripWhiteSpace();
565	bool ok;
566	long long firstBytePos = firstBytePosStr.toInt64Strict(&ok);
567	if (!ok)
568	return false;
569
570	String lastBytePosStr = byteRange.substring(index + `1`).stripWhiteSpace();
571	long long lastBytePos = -`1`;
572	if (!lastBytePosStr.isEmpty()) {
573	lastBytePos = lastBytePosStr.toInt64Strict(&ok);
574	if (!ok)
575	return false;
576	}
577
578	if (firstBytePos < `0` \|\| !(lastBytePos == -`1` \|\| lastBytePos >= firstBytePos))
579	return false;
580
581	rangeOffset = firstBytePos;
582	rangeEnd = lastBytePos;
583	return true;
584	}
585
586	// HTTP/1.1 - RFC 2616
587	// http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1
588	// Request-Line = Method SP Request-URI SP HTTP-Version CRLF
589	size_t parseHTTPRequestLine(const char* data, size_t length, String& failureReason, String& method, String& url, HTTPVersion& httpVersion)
590	{
591	method = String ();
592	url = String ();
593	httpVersion = Unknown;
594
595	const char* space1 = `0`;
596	const char* space2 = `0`;
597	const char* p;
598	size_t consumedLength;
599
600	for (p = data, consumedLength = `0`; consumedLength < length; p++, consumedLength++) {
601	if (*p == `' '`) {
602	if (!space1)
603	space1 = p;
604	else if (!space2)
605	space2 = p;
606	} else if (*p == `'\n'`)
607	break;
608	}
609
610	// Haven't finished header line.
611	if (consumedLength == length) {
612	failureReason = "Incomplete Request Line"_s;
613	return `0`;
614	}
615
616	// RequestLine does not contain 3 parts.
617	if (!space1 \|\| !space2) {
618	failureReason = "Request Line does not appear to contain: <Method> <Url> <HTTPVersion>."_s;
619	return `0`;
620	}
621
622	// The line must end with "\r\n".
623	const char* end = p + `1`;
624	if (*(end - `2`) != `'\r'`) {
625	failureReason = "Request line does not end with CRLF"_s;
626	return `0`;
627	}
628
629	// Request Method.
630	method = String (data, space1 - data); // For length subtract 1 for space, but add 1 for data being the first character.
631
632	// Request URI.
633	url = String (space1 + `1`, space2 - space1 - `1`); // For length subtract 1 for space.
634
635	// HTTP Version.
636	String httpVersionString(space2 + `1`, end - space2 - `3`); // For length subtract 1 for space, and 2 for "\r\n".
637	if (httpVersionString.length() != `8` \|\| !httpVersionString.startsWith("HTTP/1."))
638	httpVersion = Unknown;
639	else if (httpVersionString [`7`] == `'0'`)
640	httpVersion = HTTP_1_0;
641	else if (httpVersionString [`7`] == `'1'`)
642	httpVersion = HTTP_1_1;
643	else
644	httpVersion = Unknown;
645
646	return end - data;
647	}
648
649	static inline bool isValidHeaderNameCharacter(const char* character)
650	{
651	// https://tools.ietf.org/html/rfc7230#section-3.2
652	// A header name should only contain one or more of
653	// alphanumeric or ! # $ % & ' + - . ^ _ ` \| ~*
654	if (isASCIIAlphanumeric(*character))
655	return true;
656	switch (*character) {
657	case `'!'`:
658	case `'#'`:
659	case `'$'`:
660	case `'%'`:
661	case `'&'`:
662	case `'\''`:
663	case `'*'`:
664	case `'+'`:
665	case `'-'`:
666	case `'.'`:
667	case `'^'`:
668	case `'_'`:
669	case '`':
670	case `'\|'`:
671	case `'~'`:
672	return true;
673	default:
674	return false;
675	}
676	}
677
678	size_t parseHTTPHeader(const char* start, size_t length, String& failureReason, StringView& nameStr, String& valueStr, bool strict)
679	{
680	const char* p = start;
681	const char* end = start + length;
682
683	Vector<char> name;
684	Vector<char> value;
685
686	bool foundFirstNameChar = false;
687	const char* namePtr = nullptr;
688	size_t nameSize = `0`;
689
690	nameStr = StringView ();
691	valueStr = String ();
692
693	for (; p < end; p++) {
694	switch (*p) {
695	case `'\r'`:
696	if (name.isEmpty()) {
697	if (p + `1` < end && *(p + `1`) == `'\n'`)
698	return (p + `2`) - start;
699	failureReason = makeString("CR doesn't follow LF in header name at ", trimInputSample(p, end - p));
700	return `0`;
701	}
702	failureReason = makeString("Unexpected CR in header name at ", trimInputSample(name.data(), name.size()));
703	return `0`;
704	case `'\n'`:
705	failureReason = makeString("Unexpected LF in header name at ", trimInputSample(name.data(), name.size()));
706	return `0`;
707	case `':'`:
708	break;
709	default:
710	if (!isValidHeaderNameCharacter(p)) {
711	if (name.size() < `1`)
712	failureReason = "Unexpected start character in header name";
713	else
714	failureReason = makeString("Unexpected character in header name at ", trimInputSample(name.data(), name.size()));
715	return `0`;
716	}
717	name.append(*p);
718	if (!foundFirstNameChar) {
719	namePtr = p;
720	foundFirstNameChar = true;
721	}
722	continue;
723	}
724	if (*p == `':'`) {
725	++p;
726	break;
727	}
728	}
729
730	nameSize = name.size();
731	nameStr = StringView (reinterpret_cast<const LChar*>(namePtr), nameSize);
732
733	for (; p < end && *p == `0x20`; p++) { }
734
735	for (; p < end; p++) {
736	switch (*p) {
737	case `'\r'`:
738	break;
739	case `'\n'`:
740	if (strict) {
741	failureReason = makeString("Unexpected LF in header value at ", trimInputSample(value.data(), value.size()));
742	return `0`;
743	}
744	break;
745	default:
746	value.append(*p);
747	}
748	if (p == `'\r'` \|\| (!strict && p == `'\n'`)) {
749	++p;
750	break;
751	}
752	}
753	if (p >= end \|\| (strict && *p != `'\n'`)) {
754	failureReason = makeString("CR doesn't follow LF after header value at ", trimInputSample(p, end - p));
755	return `0`;
756	}
757	valueStr = String::fromUTF8(value.data(), value.size());
758	if (valueStr.isNull()) {
759	failureReason = "Invalid UTF-8 sequence in header value"_s;
760	return `0`;
761	}
762	return p - start;
763	}
764
765	size_t parseHTTPRequestBody(const char* data, size_t length, Vector<unsigned char>& body)
766	{
767	body.clear();
768	body.append(data, length);
769
770	return length;
771	}
772
773	// Implements <https://fetch.spec.whatwg.org/#forbidden-header-name>.
774	bool isForbiddenHeaderName(const String& name)
775	{
776	HTTPHeaderName headerName;
777	if (findHTTPHeaderName(name, headerName)) {
778	switch (headerName) {
779	case HTTPHeaderName::AcceptCharset:
780	case HTTPHeaderName::AcceptEncoding:
781	case HTTPHeaderName::AccessControlRequestHeaders:
782	case HTTPHeaderName::AccessControlRequestMethod:
783	case HTTPHeaderName::Connection:
784	case HTTPHeaderName::ContentLength:
785	case HTTPHeaderName::Cookie:
786	case HTTPHeaderName::Cookie2:
787	case HTTPHeaderName::Date:
788	case HTTPHeaderName::DNT:
789	case HTTPHeaderName::Expect:
790	case HTTPHeaderName::Host:
791	case HTTPHeaderName::KeepAlive:
792	case HTTPHeaderName::Origin:
793	case HTTPHeaderName::Referer:
794	case HTTPHeaderName::TE:
795	case HTTPHeaderName::Trailer:
796	case HTTPHeaderName::TransferEncoding:
797	case HTTPHeaderName::Upgrade:
798	case HTTPHeaderName::Via:
799	return true;
800	default:
801	break;
802	}
803	}
804	return startsWithLettersIgnoringASCIICase(name, "sec-") \|\| startsWithLettersIgnoringASCIICase(name, "proxy-");
805	}
806
807	// Implements <https://fetch.spec.whatwg.org/#forbidden-response-header-name>.
808	bool isForbiddenResponseHeaderName(const String& name)
809	{
810	return equalLettersIgnoringASCIICase(name, "set-cookie") \|\| equalLettersIgnoringASCIICase(name, "set-cookie2");
811	}
812
813	// Implements <https://fetch.spec.whatwg.org/#forbidden-method>.
814	bool isForbiddenMethod(const String& name)
815	{
816	return equalLettersIgnoringASCIICase(name, "connect") \|\| equalLettersIgnoringASCIICase(name, "trace") \|\| equalLettersIgnoringASCIICase(name, "track");
817	}
818
819	bool isSimpleHeader(const String& name, const String& value)
820	{
821	HTTPHeaderName headerName;
822	if (!findHTTPHeaderName(name, headerName))
823	return false;
824	return isCrossOriginSafeRequestHeader(headerName, value);
825	}
826
827	bool isCrossOriginSafeHeader(HTTPHeaderName name, const HTTPHeaderSet& accessControlExposeHeaderSet)
828	{
829	switch (name) {
830	case HTTPHeaderName::CacheControl:
831	case HTTPHeaderName::ContentLanguage:
832	case HTTPHeaderName::ContentLength:
833	case HTTPHeaderName::ContentType:
834	case HTTPHeaderName::Expires:
835	case HTTPHeaderName::LastModified:
836	case HTTPHeaderName::Pragma:
837	case HTTPHeaderName::Accept:
838	return true;
839	case HTTPHeaderName::SetCookie:
840	case HTTPHeaderName::SetCookie2:
841	return false;
842	default:
843	break;
844	}
845	return accessControlExposeHeaderSet.contains(httpHeaderNameString(name).toStringWithoutCopying());
846	}
847
848	bool isCrossOriginSafeHeader(const String& name, const HTTPHeaderSet& accessControlExposeHeaderSet)
849	{
850	#ifndef ASSERT_DISABLED
851	HTTPHeaderName headerName;
852	ASSERT(!findHTTPHeaderName(name, headerName));
853	#endif
854	return accessControlExposeHeaderSet.contains(name);
855	}
856
857	// Implements https://fetch.spec.whatwg.org/#cors-safelisted-request-header
858	bool isCrossOriginSafeRequestHeader(HTTPHeaderName name, const String& value)
859	{
860	switch (name) {
861	case HTTPHeaderName::Accept:
862	if (!isValidAcceptHeaderValue(value))
863	return false;
864	break;
865	case HTTPHeaderName::AcceptLanguage:
866	case HTTPHeaderName::ContentLanguage:
867	if (!isValidLanguageHeaderValue(value))
868	return false;
869	break;
870	case HTTPHeaderName::ContentType: {
871	// Preflight is required for MIME types that can not be sent via form submission.
872	String mimeType = extractMIMETypeFromMediaType(value);
873	if (!(equalLettersIgnoringASCIICase(mimeType, "application/x-www-form-urlencoded") \|\| equalLettersIgnoringASCIICase(mimeType, "multipart/form-data") \|\| equalLettersIgnoringASCIICase(mimeType, "text/plain")))
874	return false;
875	break;
876	}
877	default:
878	// FIXME: Should we also make safe other headers (DPR, Downlink, Save-Data...)? That would require validating their values.
879	return false;
880	}
881	return value.length() <= `128`;
882	}
883
884	// Implements <https://fetch.spec.whatwg.org/#concept-method-normalize>.
885	String normalizeHTTPMethod(const String& method)
886	{
887	const ASCIILiteral methods[] = { "DELETE"_s, "GET"_s, "HEAD"_s, "OPTIONS"_s, "POST"_s, "PUT"_s };
888	for (auto value : methods) {
889	if (equalIgnoringASCIICase(method, value.characters())) {
890	// Don't bother allocating a new string if it's already all uppercase.
891	if (method == value)
892	break;
893	return value;
894	}
895	}
896	return method;
897	}
898
899	CrossOriginResourcePolicy parseCrossOriginResourcePolicyHeader(StringView header)
900	{
901	auto strippedHeader = stripLeadingAndTrailingHTTPSpaces(header);
902
903	if (strippedHeader.isEmpty())
904	return CrossOriginResourcePolicy::None;
905
906	if (strippedHeader == "same-origin")
907	return CrossOriginResourcePolicy::SameOrigin;
908
909	if (strippedHeader == "same-site")
910	return CrossOriginResourcePolicy::SameSite;
911
912	return CrossOriginResourcePolicy::Invalid;
913	}
914
915	}
916

Browse the source code of webkit/Source/WebCore/platform/network/HTTPParsers.cpp