ASCIICType.h source code [webkit/WebKitBuild/Debug/DerivedSources/ForwardingHeaders/wtf/ASCIICType.h]

1	/*
2	* Copyright (C) 2007-2019 Apple Inc. All rights reserved.
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions
6	* are met:
7	* 1. Redistributions of source code must retain the above copyright
8	* notice, this list of conditions and the following disclaimer.
9	* 2. Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	*
13	* THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
14	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15	* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16	* DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
17	* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18	* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19	* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
20	* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22	* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23	*/
24
25	#pragma once
26
27	#include <wtf/Assertions.h>
28	#include <wtf/text/LChar.h>
29
30	// The behavior of many of the functions in the <ctype.h> header is dependent
31	// on the current locale. But in the WebKit project, all uses of those functions
32	// are in code processing something that's not locale-specific. These equivalents
33	// for some of the <ctype.h> functions are named more explicitly, not dependent
34	// on the C library locale, and we should also optimize them as needed.
35
36	// All functions return false or leave the character unchanged if passed a character
37	// that is outside the range 0-7F. So they can be used on Unicode strings or
38	// characters if the intent is to do processing only if the character is ASCII.
39
40	namespace WTF {
41
42	template<typename CharacterType> bool isASCII(CharacterType);
43	template<typename CharacterType> bool isASCIIAlpha(CharacterType);
44	template<typename CharacterType> bool isASCIIAlphanumeric(CharacterType);
45	template<typename CharacterType> bool isASCIIBinaryDigit(CharacterType);
46	template<typename CharacterType> bool isASCIIDigit(CharacterType);
47	template<typename CharacterType> bool isASCIIHexDigit(CharacterType);
48	template<typename CharacterType> bool isASCIILower(CharacterType);
49	template<typename CharacterType> bool isASCIIOctalDigit(CharacterType);
50	template<typename CharacterType> bool isASCIIPrintable(CharacterType);
51	template<typename CharacterType> bool isASCIISpace(CharacterType);
52	template<typename CharacterType> bool isASCIIUpper(CharacterType);
53
54	template<typename CharacterType> CharacterType toASCIILower(CharacterType);
55	template<typename CharacterType> CharacterType toASCIIUpper(CharacterType);
56
57	template<typename CharacterType> uint8_t toASCIIHexValue(CharacterType);
58	template<typename CharacterType> uint8_t toASCIIHexValue(CharacterType firstCharacter, CharacterType secondCharacter);
59
60	char lowerNibbleToASCIIHexDigit(uint8_t);
61	char upperNibbleToASCIIHexDigit(uint8_t);
62	char lowerNibbleToLowercaseASCIIHexDigit(uint8_t);
63	char upperNibbleToLowercaseASCIIHexDigit(uint8_t);
64
65	template<typename CharacterType> bool isASCIIAlphaCaselessEqual(CharacterType, char expectedASCIILowercaseLetter);
66
67	// The toASCIILowerUnchecked function can be used for comparing any input character
68	// to a lowercase English character. The isASCIIAlphaCaselessEqual function should
69	// be used for regular comparison of ASCII alpha characters, but switch statements
70	// in the CSS tokenizer, for example, instead make direct use toASCIILowerUnchecked.
71	template<typename CharacterType> CharacterType toASCIILowerUnchecked(CharacterType);
72
73	extern WTF_EXPORT_PRIVATE const unsigned char asciiCaseFoldTable[`256`];
74
75	template<typename CharacterType> inline bool isASCII(CharacterType character)
76	{
77	return !(character & ~`0x7F`);
78	}
79
80	template<typename CharacterType> inline bool isASCIILower(CharacterType character)
81	{
82	return character >= `'a'` && character <= `'z'`;
83	}
84
85	template<typename CharacterType> inline CharacterType toASCIILowerUnchecked(CharacterType character)
86	{
87	// This function can be used for comparing any input character
88	// to a lowercase English character. The isASCIIAlphaCaselessEqual
89	// below should be used for regular comparison of ASCII alpha
90	// characters, but switch statements in CSS tokenizer instead make
91	// direct use of this function.
92	return character \| `0x20`;
93	}
94
95	template<typename CharacterType> inline bool isASCIIAlpha(CharacterType character)
96	{
97	return isASCIILower(toASCIILowerUnchecked(character));
98	}
99
100	template<typename CharacterType> inline bool isASCIIDigit(CharacterType character)
101	{
102	return character >= `'0'` && character <= `'9'`;
103	}
104
105	template<typename CharacterType> inline bool isASCIIAlphanumeric(CharacterType character)
106	{
107	return isASCIIDigit(character) \|\| isASCIIAlpha(character);
108	}
109
110	template<typename CharacterType> inline bool isASCIIHexDigit(CharacterType character)
111	{
112	return isASCIIDigit(character) \|\| (toASCIILowerUnchecked(character) >= `'a'` && toASCIILowerUnchecked(character) <= `'f'`);
113	}
114
115	template<typename CharacterType> inline bool isASCIIBinaryDigit(CharacterType character)
116	{
117	return character == `'0'` \|\| character == `'1'`;
118	}
119
120	template<typename CharacterType> inline bool isASCIIOctalDigit(CharacterType character)
121	{
122	return character >= `'0'` && character <= `'7'`;
123	}
124
125	template<typename CharacterType> inline bool isASCIIPrintable(CharacterType character)
126	{
127	return character >= `' '` && character <= `'~'`;
128	}
129
130	/*
131	Statistics from a run of Apple's page load test for callers of isASCIISpace:
132
133	character count
134	--------- -----
135	non-spaces 689383
136	20 space 294720
137	0A \n 89059
138	09 \t 28320
139	0D \r 0
140	0C \f 0
141	0B \v 0
142
143	Because of those, we first check to quickly return false for non-control characters,
144	then check for space itself to quickly return true for that case, then do the rest.
145	*/
146	template<typename CharacterType> inline bool isASCIISpace(CharacterType character)
147	{
148	return character <= `' '` && (character == `' '` \|\| (character <= `0xD` && character >= `0x9`));
149	}
150
151	template<typename CharacterType> inline bool isASCIIUpper(CharacterType character)
152	{
153	return character >= `'A'` && character <= `'Z'`;
154	}
155
156	template<typename CharacterType> inline CharacterType toASCIILower(CharacterType character)
157	{
158	return character \| (isASCIIUpper(character) << `5`);
159	}
160
161	template<> inline char toASCIILower(char character)
162	{
163	return static_cast<char>(asciiCaseFoldTable[static_cast<uint8_t>(character)]);
164	}
165
166	template<> inline LChar toASCIILower(LChar character)
167	{
168	return asciiCaseFoldTable[character];
169	}
170
171	template<typename CharacterType> inline CharacterType toASCIIUpper(CharacterType character)
172	{
173	return character & ~(isASCIILower(character) << `5`);
174	}
175
176	template<typename CharacterType> inline uint8_t toASCIIHexValue(CharacterType character)
177	{
178	ASSERT(isASCIIHexDigit(character));
179	return character < `'A'` ? character - `'0'` : (character - `'A'` + `10`) & `0xF`;
180	}
181
182	template<typename CharacterType> inline uint8_t toASCIIHexValue(CharacterType firstCharacter, CharacterType secondCharacter)
183	{
184	return toASCIIHexValue(firstCharacter) << `4` \| toASCIIHexValue(secondCharacter);
185	}
186
187	inline char lowerNibbleToASCIIHexDigit(uint8_t value)
188	{
189	uint8_t nibble = value & `0xF`;
190	return nibble + (nibble < `10` ? `'0'` : `'A'` - `10`);
191	}
192
193	inline char upperNibbleToASCIIHexDigit(uint8_t value)
194	{
195	uint8_t nibble = value >> `4`;
196	return nibble + (nibble < `10` ? `'0'` : `'A'` - `10`);
197	}
198
199	inline char lowerNibbleToLowercaseASCIIHexDigit(uint8_t value)
200	{
201	uint8_t nibble = value & `0xF`;
202	return nibble + (nibble < `10` ? `'0'` : `'a'` - `10`);
203	}
204
205	inline char upperNibbleToLowercaseASCIIHexDigit(uint8_t value)
206	{
207	uint8_t nibble = value >> `4`;
208	return nibble + (nibble < `10` ? `'0'` : `'a'` - `10`);
209	}
210
211	template<typename CharacterType> inline bool isASCIIAlphaCaselessEqual(CharacterType inputCharacter, char expectedASCIILowercaseLetter)
212	{
213	// Name of this argument says this must be a lowercase letter, but it can actually be:
214	// - a lowercase letter
215	// - a numeric digit
216	// - a space
217	// - punctuation in the range 0x21-0x3F, including "-", "/", and "+"
218	// It cannot be:
219	// - an uppercase letter
220	// - a non-ASCII character
221	// - other punctuation, such as underscore and backslash
222	// - a control character such as "\n"
223	// FIXME: Would be nice to make both the function name and expectedASCIILowercaseLetter argument name clearer.
224	ASSERT(toASCIILowerUnchecked(expectedASCIILowercaseLetter) == expectedASCIILowercaseLetter);
225	return LIKELY(toASCIILowerUnchecked(inputCharacter) == expectedASCIILowercaseLetter);
226	}
227
228	template<typename CharacterType> inline bool isASCIIDigitOrPunctuation(CharacterType charCode)
229	{
230	return (charCode >= `'!'` && charCode <= `'@'`) \|\| (charCode >= `'['` && charCode <= '`') \|\| (charCode >= `'{'` && charCode <= `'~'`);
231	}
232
233	}
234
235	using WTF::isASCII;
236	using WTF::isASCIIAlpha;
237	using WTF::isASCIIAlphaCaselessEqual;
238	using WTF::isASCIIAlphanumeric;
239	using WTF::isASCIIBinaryDigit;
240	using WTF::isASCIIDigit;
241	using WTF::isASCIIDigitOrPunctuation;
242	using WTF::isASCIIHexDigit;
243	using WTF::isASCIILower;
244	using WTF::isASCIIOctalDigit;
245	using WTF::isASCIIPrintable;
246	using WTF::isASCIISpace;
247	using WTF::isASCIIUpper;
248	using WTF::lowerNibbleToASCIIHexDigit;
249	using WTF::lowerNibbleToLowercaseASCIIHexDigit;
250	using WTF::toASCIIHexValue;
251	using WTF::toASCIILower;
252	using WTF::toASCIILowerUnchecked;
253	using WTF::toASCIIUpper;
254	using WTF::upperNibbleToASCIIHexDigit;
255	using WTF::upperNibbleToLowercaseASCIIHexDigit;
256

Browse the source code of webkit/WebKitBuild/Debug/DerivedSources/ForwardingHeaders/wtf/ASCIICType.h