| 1 | /* |
| 2 | * Copyright (C) 2018 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions |
| 6 | * are met: |
| 7 | * 1. Redistributions of source code must retain the above copyright |
| 8 | * notice, this list of conditions and the following disclaimer. |
| 9 | * 2. Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * |
| 13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' |
| 14 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
| 15 | * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS |
| 17 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 18 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 19 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 20 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 21 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 22 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF |
| 23 | * THE POSSIBILITY OF SUCH DAMAGE. |
| 24 | */ |
| 25 | |
| 26 | #include "config.h" |
| 27 | #include "WHLSLLexer.h" |
| 28 | |
| 29 | #if ENABLE(WEBGPU) |
| 30 | |
| 31 | namespace WebCore { |
| 32 | |
| 33 | namespace WHLSL { |
| 34 | |
| 35 | const char* Lexer::Token::typeName(Type type) |
| 36 | { |
| 37 | switch (type) { |
| 38 | case Type::IntLiteral: |
| 39 | return "int literal" ; |
| 40 | case Type::UintLiteral: |
| 41 | return "uint literal" ; |
| 42 | case Type::FloatLiteral: |
| 43 | return "float literal" ; |
| 44 | case Type::Struct: |
| 45 | return "struct" ; |
| 46 | case Type::Typedef: |
| 47 | return "typedef" ; |
| 48 | case Type::Enum: |
| 49 | return "enum" ; |
| 50 | case Type::Operator: |
| 51 | return "operator" ; |
| 52 | case Type::If: |
| 53 | return "if" ; |
| 54 | case Type::Else: |
| 55 | return "else" ; |
| 56 | case Type::Continue: |
| 57 | return "continue" ; |
| 58 | case Type::Break: |
| 59 | return "break" ; |
| 60 | case Type::Switch: |
| 61 | return "switch" ; |
| 62 | case Type::Case: |
| 63 | return "case" ; |
| 64 | case Type::Default: |
| 65 | return "default" ; |
| 66 | case Type::Fallthrough: |
| 67 | return "fallthrough" ; |
| 68 | case Type::For: |
| 69 | return "for" ; |
| 70 | case Type::While: |
| 71 | return "while" ; |
| 72 | case Type::Do: |
| 73 | return "do" ; |
| 74 | case Type::Return: |
| 75 | return "return" ; |
| 76 | case Type::Trap: |
| 77 | return "trap" ; |
| 78 | case Type::Null: |
| 79 | return "null" ; |
| 80 | case Type::True: |
| 81 | return "true" ; |
| 82 | case Type::False: |
| 83 | return "false" ; |
| 84 | case Type::Constant: |
| 85 | return "constant" ; |
| 86 | case Type::Device: |
| 87 | return "device" ; |
| 88 | case Type::Threadgroup: |
| 89 | return "threadgroup" ; |
| 90 | case Type::Thread: |
| 91 | return "thread" ; |
| 92 | case Type::Space: |
| 93 | return "space" ; |
| 94 | case Type::Vertex: |
| 95 | return "vertex" ; |
| 96 | case Type::Fragment: |
| 97 | return "fragment" ; |
| 98 | case Type::Compute: |
| 99 | return "compute" ; |
| 100 | case Type::NumThreads: |
| 101 | return "numthreads" ; |
| 102 | case Type::SVInstanceID: |
| 103 | return "SV_InstanceID" ; |
| 104 | case Type::SVVertexID: |
| 105 | return "SV_VertexID" ; |
| 106 | case Type::PSize: |
| 107 | return "PSIZE" ; |
| 108 | case Type::SVPosition: |
| 109 | return "SV_Position" ; |
| 110 | case Type::SVIsFrontFace: |
| 111 | return "SV_IsFrontFace" ; |
| 112 | case Type::SVSampleIndex: |
| 113 | return "SV_SampleIndex" ; |
| 114 | case Type::SVInnerCoverage: |
| 115 | return "SV_InnerCoverage" ; |
| 116 | case Type::SVTarget: |
| 117 | return "SV_Target" ; |
| 118 | case Type::SVDepth: |
| 119 | return "SV_Depth" ; |
| 120 | case Type::SVCoverage: |
| 121 | return "SV_Coverage" ; |
| 122 | case Type::SVDispatchThreadID: |
| 123 | return "SV_DispatchThreadID" ; |
| 124 | case Type::SVGroupID: |
| 125 | return "SV_GroupID" ; |
| 126 | case Type::SVGroupIndex: |
| 127 | return "SV_GroupIndex" ; |
| 128 | case Type::SVGroupThreadID: |
| 129 | return "SV_GroupThreadID" ; |
| 130 | case Type::Attribute: |
| 131 | return "SV_Attribute" ; |
| 132 | case Type::Register: |
| 133 | return "register" ; |
| 134 | case Type::Specialized: |
| 135 | return "specialized" ; |
| 136 | case Type::Native: |
| 137 | return "native" ; |
| 138 | case Type::Restricted: |
| 139 | return "restricted" ; |
| 140 | case Type::Underscore: |
| 141 | return "_" ; |
| 142 | case Type::Auto: |
| 143 | return "auto" ; |
| 144 | case Type::Protocol: |
| 145 | return "protocol" ; |
| 146 | case Type::Const: |
| 147 | return "const" ; |
| 148 | case Type::Static: |
| 149 | return "static" ; |
| 150 | case Type::Qualifier: |
| 151 | return "qualifier" ; |
| 152 | case Type::Identifier: |
| 153 | return "identifier" ; |
| 154 | case Type::OperatorName: |
| 155 | return "operator name" ; |
| 156 | case Type::EqualsSign: |
| 157 | return "=" ; |
| 158 | case Type::Semicolon: |
| 159 | return ";" ; |
| 160 | case Type::LeftCurlyBracket: |
| 161 | return "{" ; |
| 162 | case Type::RightCurlyBracket: |
| 163 | return "}" ; |
| 164 | case Type::Colon: |
| 165 | return ":" ; |
| 166 | case Type::Comma: |
| 167 | return "," ; |
| 168 | case Type::LeftParenthesis: |
| 169 | return "(" ; |
| 170 | case Type::RightParenthesis: |
| 171 | return ")" ; |
| 172 | case Type::SquareBracketPair: |
| 173 | return "[]" ; |
| 174 | case Type::LeftSquareBracket: |
| 175 | return "[" ; |
| 176 | case Type::RightSquareBracket: |
| 177 | return "]" ; |
| 178 | case Type::Star: |
| 179 | return "*" ; |
| 180 | case Type::LessThanSign: |
| 181 | return "<" ; |
| 182 | case Type::GreaterThanSign: |
| 183 | return ">" ; |
| 184 | case Type::FullStop: |
| 185 | return "." ; |
| 186 | case Type::PlusEquals: |
| 187 | return "+=" ; |
| 188 | case Type::MinusEquals: |
| 189 | return "-=" ; |
| 190 | case Type::TimesEquals: |
| 191 | return "*=" ; |
| 192 | case Type::DivideEquals: |
| 193 | return "/=" ; |
| 194 | case Type::ModEquals: |
| 195 | return "%=" ; |
| 196 | case Type::XorEquals: |
| 197 | return "^=" ; |
| 198 | case Type::AndEquals: |
| 199 | return "&=" ; |
| 200 | case Type::OrEquals: |
| 201 | return "|=" ; |
| 202 | case Type::RightShiftEquals: |
| 203 | return ">>=" ; |
| 204 | case Type::LeftShiftEquals: |
| 205 | return "<<=" ; |
| 206 | case Type::PlusPlus: |
| 207 | return "++" ; |
| 208 | case Type::MinusMinus: |
| 209 | return "--" ; |
| 210 | case Type::Arrow: |
| 211 | return "->" ; |
| 212 | case Type::QuestionMark: |
| 213 | return "?" ; |
| 214 | case Type::OrOr: |
| 215 | return "||" ; |
| 216 | case Type::AndAnd: |
| 217 | return "&&" ; |
| 218 | case Type::Or: |
| 219 | return "|" ; |
| 220 | case Type::Xor: |
| 221 | return "^" ; |
| 222 | case Type::And: |
| 223 | return "&" ; |
| 224 | case Type::LessThanOrEqualTo: |
| 225 | return "<=" ; |
| 226 | case Type::GreaterThanOrEqualTo: |
| 227 | return ">=" ; |
| 228 | case Type::EqualComparison: |
| 229 | return "==" ; |
| 230 | case Type::NotEqual: |
| 231 | return "!=" ; |
| 232 | case Type::RightShift: |
| 233 | return ">>" ; |
| 234 | case Type::LeftShift: |
| 235 | return "<<" ; |
| 236 | case Type::Plus: |
| 237 | return "+" ; |
| 238 | case Type::Minus: |
| 239 | return "-" ; |
| 240 | case Type::Divide: |
| 241 | return "/" ; |
| 242 | case Type::Mod: |
| 243 | return "%" ; |
| 244 | case Type::Tilde: |
| 245 | return "~" ; |
| 246 | case Type::ExclamationPoint: |
| 247 | return "!" ; |
| 248 | case Type::At: |
| 249 | return "@" ; |
| 250 | } |
| 251 | } |
| 252 | |
| 253 | auto Lexer::recognizeKeyword(unsigned end) -> Optional<Token::Type> |
| 254 | { |
| 255 | auto substring = m_stringView.substring(m_offset, end - m_offset); |
| 256 | if (substring == "struct" ) |
| 257 | return Token::Type::Struct; |
| 258 | if (substring == "typedef" ) |
| 259 | return Token::Type::Typedef; |
| 260 | if (substring == "enum" ) |
| 261 | return Token::Type::Enum; |
| 262 | if (substring == "operator" ) |
| 263 | return Token::Type::Operator; |
| 264 | if (substring == "if" ) |
| 265 | return Token::Type::If; |
| 266 | if (substring == "else" ) |
| 267 | return Token::Type::Else; |
| 268 | if (substring == "continue" ) |
| 269 | return Token::Type::Continue; |
| 270 | if (substring == "break" ) |
| 271 | return Token::Type::Break; |
| 272 | if (substring == "switch" ) |
| 273 | return Token::Type::Switch; |
| 274 | if (substring == "case" ) |
| 275 | return Token::Type::Case; |
| 276 | if (substring == "default" ) |
| 277 | return Token::Type::Default; |
| 278 | if (substring == "fallthrough" ) |
| 279 | return Token::Type::Fallthrough; |
| 280 | if (substring == "for" ) |
| 281 | return Token::Type::For; |
| 282 | if (substring == "while" ) |
| 283 | return Token::Type::While; |
| 284 | if (substring == "do" ) |
| 285 | return Token::Type::Do; |
| 286 | if (substring == "return" ) |
| 287 | return Token::Type::Return; |
| 288 | if (substring == "trap" ) |
| 289 | return Token::Type::Trap; |
| 290 | if (substring == "null" ) |
| 291 | return Token::Type::Null; |
| 292 | if (substring == "true" ) |
| 293 | return Token::Type::True; |
| 294 | if (substring == "false" ) |
| 295 | return Token::Type::False; |
| 296 | if (substring == "constant" ) |
| 297 | return Token::Type::Constant; |
| 298 | if (substring == "device" ) |
| 299 | return Token::Type::Device; |
| 300 | if (substring == "threadgroup" ) |
| 301 | return Token::Type::Threadgroup; |
| 302 | if (substring == "thread" ) |
| 303 | return Token::Type::Thread; |
| 304 | if (substring == "space" ) |
| 305 | return Token::Type::Space; |
| 306 | if (substring == "vertex" ) |
| 307 | return Token::Type::Vertex; |
| 308 | if (substring == "fragment" ) |
| 309 | return Token::Type::Fragment; |
| 310 | if (substring == "compute" ) |
| 311 | return Token::Type::Compute; |
| 312 | if (substring == "numthreads" ) |
| 313 | return Token::Type::NumThreads; |
| 314 | if (substring == "SV_InstanceID" ) |
| 315 | return Token::Type::SVInstanceID; |
| 316 | if (substring == "SV_VertexID" ) |
| 317 | return Token::Type::SVVertexID; |
| 318 | if (substring == "PSIZE" ) |
| 319 | return Token::Type::PSize; |
| 320 | if (substring == "SV_Position" ) |
| 321 | return Token::Type::SVPosition; |
| 322 | if (substring == "SV_IsFrontFace" ) |
| 323 | return Token::Type::SVIsFrontFace; |
| 324 | if (substring == "SV_SampleIndex" ) |
| 325 | return Token::Type::SVSampleIndex; |
| 326 | if (substring == "SV_InnerCoverage" ) |
| 327 | return Token::Type::SVInnerCoverage; |
| 328 | if (substring == "SV_Target" ) // FIXME: https://bugs.webkit.org/show_bug.cgi?id=195807 Make this work with strings like "SV_Target0". |
| 329 | return Token::Type::SVTarget; |
| 330 | if (substring == "SV_Depth" ) |
| 331 | return Token::Type::SVDepth; |
| 332 | if (substring == "SV_Coverage" ) |
| 333 | return Token::Type::SVCoverage; |
| 334 | if (substring == "SV_DispatchThreadID" ) |
| 335 | return Token::Type::SVDispatchThreadID; |
| 336 | if (substring == "SV_GroupID" ) |
| 337 | return Token::Type::SVGroupID; |
| 338 | if (substring == "SV_GroupIndex" ) |
| 339 | return Token::Type::SVGroupIndex; |
| 340 | if (substring == "SV_GroupThreadID" ) |
| 341 | return Token::Type::SVGroupThreadID; |
| 342 | if (substring == "attribute" ) |
| 343 | return Token::Type::Attribute; |
| 344 | if (substring == "register" ) |
| 345 | return Token::Type::Register; |
| 346 | if (substring == "specialized" ) |
| 347 | return Token::Type::Specialized; |
| 348 | if (substring == "native" ) |
| 349 | return Token::Type::Native; |
| 350 | if (substring == "restricted" ) |
| 351 | return Token::Type::Restricted; |
| 352 | if (substring == "_" ) |
| 353 | return Token::Type::Underscore; |
| 354 | if (substring == "auto" ) |
| 355 | return Token::Type::Auto; |
| 356 | if (substring == "protocol" ) |
| 357 | return Token::Type::Protocol; |
| 358 | if (substring == "const" ) |
| 359 | return Token::Type::Const; |
| 360 | if (substring == "static" ) |
| 361 | return Token::Type::Static; |
| 362 | if (substring == "nointerpolation" ) |
| 363 | return Token::Type::Qualifier; |
| 364 | if (substring == "noperspective" ) |
| 365 | return Token::Type::Qualifier; |
| 366 | if (substring == "uniform" ) |
| 367 | return Token::Type::Qualifier; |
| 368 | if (substring == "centroid" ) |
| 369 | return Token::Type::Qualifier; |
| 370 | if (substring == "sample" ) |
| 371 | return Token::Type::Qualifier; |
| 372 | return WTF::nullopt; |
| 373 | } |
| 374 | |
| 375 | auto Lexer::consumeTokenFromStream() -> Optional<Token> |
| 376 | { |
| 377 | auto prepare = [&](unsigned newOffset, Token::Type type) -> Optional<Token> { |
| 378 | auto oldOffset = m_offset; |
| 379 | m_offset = newOffset; |
| 380 | skipWhitespaceAndComments(); |
| 381 | return {{ m_stringView.substring(oldOffset, newOffset - oldOffset), m_lineNumber, type }}; |
| 382 | }; |
| 383 | |
| 384 | if (auto newOffset = floatLiteral(m_offset)) |
| 385 | return prepare(*newOffset, Token::Type::FloatLiteral); |
| 386 | if (auto newOffset = uintLiteral(m_offset)) |
| 387 | return prepare(*newOffset, Token::Type::UintLiteral); |
| 388 | if (auto newOffset = intLiteral(m_offset)) |
| 389 | return prepare(*newOffset, Token::Type::IntLiteral); |
| 390 | if (auto newOffset = operatorName(m_offset)) |
| 391 | return prepare(*newOffset, Token::Type::OperatorName); |
| 392 | if (auto newOffset = identifier(m_offset)) { |
| 393 | if (auto result = recognizeKeyword(*newOffset)) |
| 394 | return prepare(*newOffset, *result); |
| 395 | return prepare(*newOffset, Token::Type::Identifier); |
| 396 | } |
| 397 | // Sorted by length, so longer matches are preferable to shorter matches. |
| 398 | if (auto newOffset = string(">>=" , m_offset)) |
| 399 | return prepare(*newOffset, Token::Type::RightShiftEquals); |
| 400 | if (auto newOffset = string("<<=" , m_offset)) |
| 401 | return prepare(*newOffset, Token::Type::LeftShiftEquals); |
| 402 | if (auto newOffset = string("+=" , m_offset)) |
| 403 | return prepare(*newOffset, Token::Type::PlusEquals); |
| 404 | if (auto newOffset = string("-=" , m_offset)) |
| 405 | return prepare(*newOffset, Token::Type::MinusEquals); |
| 406 | if (auto newOffset = string("*=" , m_offset)) |
| 407 | return prepare(*newOffset, Token::Type::TimesEquals); |
| 408 | if (auto newOffset = string("/=" , m_offset)) |
| 409 | return prepare(*newOffset, Token::Type::DivideEquals); |
| 410 | if (auto newOffset = string("%=" , m_offset)) |
| 411 | return prepare(*newOffset, Token::Type::ModEquals); |
| 412 | if (auto newOffset = string("^=" , m_offset)) |
| 413 | return prepare(*newOffset, Token::Type::XorEquals); |
| 414 | if (auto newOffset = string("&=" , m_offset)) |
| 415 | return prepare(*newOffset, Token::Type::AndEquals); |
| 416 | if (auto newOffset = string("|=" , m_offset)) |
| 417 | return prepare(*newOffset, Token::Type::OrEquals); |
| 418 | if (auto newOffset = string("++" , m_offset)) |
| 419 | return prepare(*newOffset, Token::Type::PlusPlus); |
| 420 | if (auto newOffset = string("--" , m_offset)) |
| 421 | return prepare(*newOffset, Token::Type::MinusMinus); |
| 422 | if (auto newOffset = string("->" , m_offset)) |
| 423 | return prepare(*newOffset, Token::Type::Arrow); |
| 424 | if (auto newOffset = string("[]" , m_offset)) |
| 425 | return prepare(*newOffset, Token::Type::SquareBracketPair); |
| 426 | if (auto newOffset = string("||" , m_offset)) |
| 427 | return prepare(*newOffset, Token::Type::OrOr); |
| 428 | if (auto newOffset = string("&&" , m_offset)) |
| 429 | return prepare(*newOffset, Token::Type::AndAnd); |
| 430 | if (auto newOffset = string("<=" , m_offset)) |
| 431 | return prepare(*newOffset, Token::Type::LessThanOrEqualTo); |
| 432 | if (auto newOffset = string(">=" , m_offset)) |
| 433 | return prepare(*newOffset, Token::Type::GreaterThanOrEqualTo); |
| 434 | if (auto newOffset = string("==" , m_offset)) |
| 435 | return prepare(*newOffset, Token::Type::EqualComparison); |
| 436 | if (auto newOffset = string("!=" , m_offset)) |
| 437 | return prepare(*newOffset, Token::Type::NotEqual); |
| 438 | if (auto newOffset = string(">>" , m_offset)) |
| 439 | return prepare(*newOffset, Token::Type::RightShift); |
| 440 | if (auto newOffset = string("<<" , m_offset)) |
| 441 | return prepare(*newOffset, Token::Type::LeftShift); |
| 442 | if (auto newOffset = character('=', m_offset)) |
| 443 | return prepare(*newOffset, Token::Type::EqualsSign); |
| 444 | if (auto newOffset = character(';', m_offset)) |
| 445 | return prepare(*newOffset, Token::Type::Semicolon); |
| 446 | if (auto newOffset = character('{', m_offset)) |
| 447 | return prepare(*newOffset, Token::Type::LeftCurlyBracket); |
| 448 | if (auto newOffset = character('}', m_offset)) |
| 449 | return prepare(*newOffset, Token::Type::RightCurlyBracket); |
| 450 | if (auto newOffset = character(':', m_offset)) |
| 451 | return prepare(*newOffset, Token::Type::Colon); |
| 452 | if (auto newOffset = character(',', m_offset)) |
| 453 | return prepare(*newOffset, Token::Type::Comma); |
| 454 | if (auto newOffset = character('(', m_offset)) |
| 455 | return prepare(*newOffset, Token::Type::LeftParenthesis); |
| 456 | if (auto newOffset = character(')', m_offset)) |
| 457 | return prepare(*newOffset, Token::Type::RightParenthesis); |
| 458 | if (auto newOffset = character('[', m_offset)) |
| 459 | return prepare(*newOffset, Token::Type::LeftSquareBracket); |
| 460 | if (auto newOffset = character(']', m_offset)) |
| 461 | return prepare(*newOffset, Token::Type::RightSquareBracket); |
| 462 | if (auto newOffset = character('*', m_offset)) |
| 463 | return prepare(*newOffset, Token::Type::Star); |
| 464 | if (auto newOffset = character('<', m_offset)) |
| 465 | return prepare(*newOffset, Token::Type::LessThanSign); |
| 466 | if (auto newOffset = character('>', m_offset)) |
| 467 | return prepare(*newOffset, Token::Type::GreaterThanSign); |
| 468 | if (auto newOffset = character('.', m_offset)) |
| 469 | return prepare(*newOffset, Token::Type::FullStop); |
| 470 | if (auto newOffset = character('?', m_offset)) |
| 471 | return prepare(*newOffset, Token::Type::QuestionMark); |
| 472 | if (auto newOffset = character('|', m_offset)) |
| 473 | return prepare(*newOffset, Token::Type::Or); |
| 474 | if (auto newOffset = character('^', m_offset)) |
| 475 | return prepare(*newOffset, Token::Type::Xor); |
| 476 | if (auto newOffset = character('&', m_offset)) |
| 477 | return prepare(*newOffset, Token::Type::And); |
| 478 | if (auto newOffset = character('+', m_offset)) |
| 479 | return prepare(*newOffset, Token::Type::Plus); |
| 480 | if (auto newOffset = character('-', m_offset)) |
| 481 | return prepare(*newOffset, Token::Type::Minus); |
| 482 | if (auto newOffset = character('/', m_offset)) |
| 483 | return prepare(*newOffset, Token::Type::Divide); |
| 484 | if (auto newOffset = character('%', m_offset)) |
| 485 | return prepare(*newOffset, Token::Type::Mod); |
| 486 | if (auto newOffset = character('~', m_offset)) |
| 487 | return prepare(*newOffset, Token::Type::Tilde); |
| 488 | if (auto newOffset = character('!', m_offset)) |
| 489 | return prepare(*newOffset, Token::Type::ExclamationPoint); |
| 490 | if (auto newOffset = character('@', m_offset)) |
| 491 | return prepare(*newOffset, Token::Type::At); |
| 492 | |
| 493 | return WTF::nullopt; |
| 494 | } |
| 495 | |
| 496 | void Lexer::skipWhitespaceAndComments() |
| 497 | { |
| 498 | unsigned savedOffset; |
| 499 | do { |
| 500 | savedOffset = m_offset; |
| 501 | skipWhitespace(); |
| 502 | skipLineComment(); |
| 503 | skipLongComment(); |
| 504 | } while (savedOffset != m_offset); |
| 505 | } |
| 506 | |
| 507 | static inline bool isWhitespace(UChar codeUnit) |
| 508 | { |
| 509 | switch (codeUnit) { |
| 510 | case ' ': |
| 511 | case '\t': |
| 512 | case '\r': |
| 513 | case '\n': |
| 514 | return true; |
| 515 | default: |
| 516 | return false; |
| 517 | } |
| 518 | } |
| 519 | |
| 520 | static inline bool isNewline(UChar codeUnit) |
| 521 | { |
| 522 | switch (codeUnit) { |
| 523 | case '\r': |
| 524 | case '\n': |
| 525 | return true; |
| 526 | default: |
| 527 | return false; |
| 528 | } |
| 529 | } |
| 530 | |
| 531 | // We can take advantage of two properties of Unicode: |
| 532 | // 1. The consitutent UTF-16 code units for all non-BMP code points are surrogates, |
| 533 | // which means we'll never see a false match. If we see a BMP code unit, we |
| 534 | // really have a BMP code point. |
| 535 | // 2. Everything we're looking for is in BMP |
| 536 | |
| 537 | void Lexer::skipWhitespace() |
| 538 | { |
| 539 | for ( ; m_offset < m_stringView.length() && isWhitespace(m_stringView[m_offset]); ++m_offset) { |
| 540 | if (m_stringView[m_offset] == '\r' && m_offset + 1 < m_stringView.length() && m_stringView[m_offset + 1] == '\n') { |
| 541 | ++m_offset; |
| 542 | ++m_lineNumber; |
| 543 | } else if (isNewline(m_stringView[m_offset])) |
| 544 | ++m_lineNumber; |
| 545 | } |
| 546 | } |
| 547 | |
| 548 | void Lexer::skipLineComment() |
| 549 | { |
| 550 | if (m_offset + 1 >= m_stringView.length() || m_stringView[m_offset] != '/' || m_stringView[m_offset + 1] != '/') |
| 551 | return; |
| 552 | |
| 553 | m_offset += 2; |
| 554 | for ( ; m_offset < m_stringView.length() && !isNewline(m_stringView[m_offset]); ++m_offset) { } |
| 555 | } |
| 556 | |
| 557 | void Lexer::skipLongComment() |
| 558 | { |
| 559 | if (m_offset + 1 >= m_stringView.length() || m_stringView[m_offset] != '/' || m_stringView[m_offset + 1] != '*') |
| 560 | return; |
| 561 | |
| 562 | m_offset += 2; |
| 563 | do { |
| 564 | for ( ; m_offset < m_stringView.length() && m_stringView[m_offset] != '*'; ++m_offset) { |
| 565 | if (m_stringView[m_offset] == '\r' && m_offset + 1 < m_stringView.length() && m_stringView[m_offset + 1] == '\n') { |
| 566 | ++m_offset; |
| 567 | ++m_lineNumber; |
| 568 | } else if (isNewline(m_stringView[m_offset])) |
| 569 | ++m_lineNumber; |
| 570 | } |
| 571 | if (m_offset < m_stringView.length()) |
| 572 | ++m_offset; |
| 573 | if (m_offset < m_stringView.length() && m_stringView[m_offset] == '/') { |
| 574 | ++m_offset; |
| 575 | break; |
| 576 | } |
| 577 | } while (m_offset < m_stringView.length()); |
| 578 | } |
| 579 | |
| 580 | // Regular expression are unnecessary; we shouldn't need to compile them. |
| 581 | |
| 582 | Optional<unsigned> Lexer::coreDecimalIntLiteral(unsigned offset) const |
| 583 | { |
| 584 | if (offset >= m_stringView.length()) |
| 585 | return WTF::nullopt; |
| 586 | if (m_stringView[offset] == '0') |
| 587 | return offset + 1; |
| 588 | if (m_stringView[offset] >= '1' && m_stringView[offset] <= '9') { |
| 589 | ++offset; |
| 590 | for ( ; offset < m_stringView.length() && m_stringView[offset] >= '0' && m_stringView[offset] <= '9'; ++offset) { |
| 591 | } |
| 592 | return offset; |
| 593 | } |
| 594 | return WTF::nullopt; |
| 595 | } |
| 596 | |
| 597 | Optional<unsigned> Lexer::decimalIntLiteral(unsigned offset) const |
| 598 | { |
| 599 | if (offset < m_stringView.length() && m_stringView[offset] == '-') |
| 600 | ++offset; |
| 601 | return coreDecimalIntLiteral(offset); |
| 602 | } |
| 603 | |
| 604 | Optional<unsigned> Lexer::decimalUintLiteral(unsigned offset) const |
| 605 | { |
| 606 | auto result = coreDecimalIntLiteral(offset); |
| 607 | if (!result) |
| 608 | return WTF::nullopt; |
| 609 | if (*result < m_stringView.length() && m_stringView[*result] == 'u') |
| 610 | return *result + 1; |
| 611 | return WTF::nullopt; |
| 612 | } |
| 613 | |
| 614 | static inline bool isHexadecimalCharacter(UChar character) |
| 615 | { |
| 616 | return (character >= '0' && character <= '9') |
| 617 | || (character >= 'a' && character <= 'f') |
| 618 | || (character >= 'A' && character <= 'F'); |
| 619 | } |
| 620 | |
| 621 | Optional<unsigned> Lexer::coreHexadecimalIntLiteral(unsigned offset) const |
| 622 | { |
| 623 | if (offset + 1 >= m_stringView.length() || m_stringView[offset] != '0' || m_stringView[offset + 1] != 'x') |
| 624 | return WTF::nullopt; |
| 625 | |
| 626 | offset += 2; |
| 627 | if (offset >= m_stringView.length() || !isHexadecimalCharacter(m_stringView[offset])) |
| 628 | return WTF::nullopt; |
| 629 | ++offset; |
| 630 | for ( ; offset < m_stringView.length() && isHexadecimalCharacter(m_stringView[offset]); ++offset) { |
| 631 | } |
| 632 | return offset; |
| 633 | } |
| 634 | |
| 635 | Optional<unsigned> Lexer::hexadecimalIntLiteral(unsigned offset) const |
| 636 | { |
| 637 | if (offset < m_stringView.length() && m_stringView[offset] == '-') |
| 638 | ++offset; |
| 639 | return coreHexadecimalIntLiteral(offset); |
| 640 | } |
| 641 | |
| 642 | Optional<unsigned> Lexer::hexadecimalUintLiteral(unsigned offset) const |
| 643 | { |
| 644 | auto result = coreHexadecimalIntLiteral(offset); |
| 645 | if (!result) |
| 646 | return WTF::nullopt; |
| 647 | if (*result < m_stringView.length() && m_stringView[*result] == 'u') |
| 648 | return *result + 1; |
| 649 | return WTF::nullopt; |
| 650 | } |
| 651 | |
| 652 | Optional<unsigned> Lexer::intLiteral(unsigned offset) const |
| 653 | { |
| 654 | if (auto result = decimalIntLiteral(offset)) |
| 655 | return result; |
| 656 | if (auto result = hexadecimalIntLiteral(offset)) |
| 657 | return result; |
| 658 | return WTF::nullopt; |
| 659 | } |
| 660 | |
| 661 | Optional<unsigned> Lexer::uintLiteral(unsigned offset) const |
| 662 | { |
| 663 | if (auto result = decimalUintLiteral(offset)) |
| 664 | return result; |
| 665 | if (auto result = hexadecimalUintLiteral(offset)) |
| 666 | return result; |
| 667 | return WTF::nullopt; |
| 668 | } |
| 669 | |
| 670 | Optional<unsigned> Lexer::digit(unsigned offset) const |
| 671 | { |
| 672 | if (offset < m_stringView.length() && m_stringView[offset] >= '0' && m_stringView[offset] <= '9') |
| 673 | return offset + 1; |
| 674 | return WTF::nullopt; |
| 675 | } |
| 676 | |
| 677 | unsigned Lexer::digitStar(unsigned offset) const |
| 678 | { |
| 679 | while (true) { |
| 680 | auto result = digit(offset); |
| 681 | if (!result) |
| 682 | return offset; |
| 683 | offset = *result; |
| 684 | } |
| 685 | } |
| 686 | |
| 687 | Optional<unsigned> Lexer::character(char character, unsigned offset) const |
| 688 | { |
| 689 | if (offset < m_stringView.length() && m_stringView[offset] == character) |
| 690 | return offset + 1; |
| 691 | return WTF::nullopt; |
| 692 | } |
| 693 | |
| 694 | Optional<unsigned> Lexer::coreFloatLiteralType1(unsigned offset) const |
| 695 | { |
| 696 | auto result = digit(offset); |
| 697 | if (!result) |
| 698 | return WTF::nullopt; |
| 699 | auto result2 = digitStar(*result); |
| 700 | auto result3 = character('.', result2); |
| 701 | if (!result3) |
| 702 | return WTF::nullopt; |
| 703 | return digitStar(*result3); |
| 704 | } |
| 705 | |
| 706 | Optional<unsigned> Lexer::coreFloatLiteral(unsigned offset) const |
| 707 | { |
| 708 | if (auto type1 = coreFloatLiteralType1(offset)) |
| 709 | return type1; |
| 710 | auto result = digitStar(offset); |
| 711 | auto result2 = character('.', result); |
| 712 | if (!result2) |
| 713 | return WTF::nullopt; |
| 714 | auto result3 = digit(*result2); |
| 715 | if (!result3) |
| 716 | return WTF::nullopt; |
| 717 | return digitStar(*result3); |
| 718 | } |
| 719 | |
| 720 | Optional<unsigned> Lexer::floatLiteral(unsigned offset) const |
| 721 | { |
| 722 | if (offset < m_stringView.length() && m_stringView[offset] == '-') |
| 723 | ++offset; |
| 724 | auto result = coreFloatLiteral(offset); |
| 725 | if (!result) |
| 726 | return WTF::nullopt; |
| 727 | offset = *result; |
| 728 | if (offset < m_stringView.length() && m_stringView[offset] == 'f') |
| 729 | ++offset; |
| 730 | return offset; |
| 731 | } |
| 732 | |
| 733 | Optional<unsigned> Lexer::validIdentifier(unsigned offset) const |
| 734 | { |
| 735 | if (offset >= m_stringView.length() |
| 736 | || !((m_stringView[offset] >= 'a' && m_stringView[offset] <= 'z') |
| 737 | || (m_stringView[offset] >= 'A' && m_stringView[offset] <= 'Z') |
| 738 | || (m_stringView[offset] == '_'))) |
| 739 | return WTF::nullopt; |
| 740 | ++offset; |
| 741 | while (true) { |
| 742 | if (offset >= m_stringView.length() |
| 743 | || !((m_stringView[offset] >= 'a' && m_stringView[offset] <= 'z') |
| 744 | || (m_stringView[offset] >= 'A' && m_stringView[offset] <= 'Z') |
| 745 | || (m_stringView[offset] >= '0' && m_stringView[offset] <= '9') |
| 746 | || (m_stringView[offset] == '_'))) |
| 747 | return offset; |
| 748 | ++offset; |
| 749 | } |
| 750 | } |
| 751 | |
| 752 | Optional<unsigned> Lexer::identifier(unsigned offset) const |
| 753 | { |
| 754 | return validIdentifier(offset); |
| 755 | } |
| 756 | |
| 757 | Optional<unsigned> Lexer::operatorName(unsigned offset) const |
| 758 | { |
| 759 | if (auto result = string("operator&." , offset)) |
| 760 | return validIdentifier(*result); |
| 761 | if (auto result = string("operator." , offset)) { |
| 762 | if ((result = validIdentifier(*result))) { |
| 763 | if (auto result2 = character('=', *result)) |
| 764 | return result2; |
| 765 | return *result; |
| 766 | } |
| 767 | } |
| 768 | if (auto result = string("operator" , offset)) { |
| 769 | // Sorted by length, so longer matches are preferable to shorter matches. |
| 770 | if (auto result2 = string("&[]" , *result)) |
| 771 | return result2; |
| 772 | if (auto result2 = string("[]=" , *result)) |
| 773 | return result2; |
| 774 | if (auto result2 = string(">>" , *result)) |
| 775 | return result2; |
| 776 | if (auto result2 = string("<<" , *result)) |
| 777 | return result2; |
| 778 | if (auto result2 = string("++" , *result)) |
| 779 | return result2; |
| 780 | if (auto result2 = string("--" , *result)) |
| 781 | return result2; |
| 782 | if (auto result2 = string("&&" , *result)) |
| 783 | return result2; |
| 784 | if (auto result2 = string("||" , *result)) |
| 785 | return result2; |
| 786 | if (auto result2 = string(">=" , *result)) |
| 787 | return result2; |
| 788 | if (auto result2 = string("<=" , *result)) |
| 789 | return result2; |
| 790 | if (auto result2 = string("==" , *result)) |
| 791 | return result2; |
| 792 | if (auto result2 = string("[]" , *result)) |
| 793 | return result2; |
| 794 | if (auto result2 = character('+', *result)) |
| 795 | return result2; |
| 796 | if (auto result2 = character('-', *result)) |
| 797 | return result2; |
| 798 | if (auto result2 = character('*', *result)) |
| 799 | return result2; |
| 800 | if (auto result2 = character('/', *result)) |
| 801 | return result2; |
| 802 | if (auto result2 = character('%', *result)) |
| 803 | return result2; |
| 804 | if (auto result2 = character('<', *result)) |
| 805 | return result2; |
| 806 | if (auto result2 = character('>', *result)) |
| 807 | return result2; |
| 808 | if (auto result2 = character('!', *result)) |
| 809 | return result2; |
| 810 | if (auto result2 = character('~', *result)) |
| 811 | return result2; |
| 812 | if (auto result2 = character('&', *result)) |
| 813 | return result2; |
| 814 | if (auto result2 = character('^', *result)) |
| 815 | return result2; |
| 816 | if (auto result2 = character('|', *result)) |
| 817 | return result2; |
| 818 | } |
| 819 | return WTF::nullopt; |
| 820 | } |
| 821 | |
| 822 | } // namespace WHLSL |
| 823 | |
| 824 | } // namespace WebCore |
| 825 | |
| 826 | #endif // ENABLE(WEBGPU) |
| 827 | |