| 1 | /* |
| 2 | * Copyright (C) 2011, 2013 Google Inc. All rights reserved. |
| 3 | * Copyright (C) 2013 Cable Television Labs, Inc. |
| 4 | * Copyright (C) 2011-2014 Apple Inc. All rights reserved. |
| 5 | * |
| 6 | * Redistribution and use in source and binary forms, with or without |
| 7 | * modification, are permitted provided that the following conditions are |
| 8 | * met: |
| 9 | * |
| 10 | * * Redistributions of source code must retain the above copyright |
| 11 | * notice, this list of conditions and the following disclaimer. |
| 12 | * * Redistributions in binary form must reproduce the above |
| 13 | * copyright notice, this list of conditions and the following disclaimer |
| 14 | * in the documentation and/or other materials provided with the |
| 15 | * distribution. |
| 16 | * * Neither the name of Google Inc. nor the names of its |
| 17 | * contributors may be used to endorse or promote products derived from |
| 18 | * this software without specific prior written permission. |
| 19 | * |
| 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 24 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 25 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 26 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 27 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 28 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 30 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 31 | */ |
| 32 | |
| 33 | #include "config.h" |
| 34 | |
| 35 | #if ENABLE(VIDEO_TRACK) |
| 36 | |
| 37 | #include "WebVTTParser.h" |
| 38 | |
| 39 | #include "HTMLParserIdioms.h" |
| 40 | #include "ISOVTTCue.h" |
| 41 | #include "ProcessingInstruction.h" |
| 42 | #include "StyleRule.h" |
| 43 | #include "StyleRuleImport.h" |
| 44 | #include "StyleSheetContents.h" |
| 45 | #include "Text.h" |
| 46 | #include "VTTScanner.h" |
| 47 | #include "WebVTTElement.h" |
| 48 | #include "WebVTTTokenizer.h" |
| 49 | |
| 50 | namespace WebCore { |
| 51 | |
| 52 | const double secondsPerHour = 3600; |
| 53 | const double secondsPerMinute = 60; |
| 54 | const double secondsPerMillisecond = 0.001; |
| 55 | const char* fileIdentifier = "WEBVTT" ; |
| 56 | const unsigned fileIdentifierLength = 6; |
| 57 | const unsigned regionIdentifierLength = 6; |
| 58 | const unsigned styleIdentifierLength = 5; |
| 59 | |
| 60 | bool WebVTTParser::parseFloatPercentageValue(VTTScanner& valueScanner, float& percentage) |
| 61 | { |
| 62 | float number; |
| 63 | if (!valueScanner.scanFloat(number)) |
| 64 | return false; |
| 65 | // '%' must be present and at the end of the setting value. |
| 66 | if (!valueScanner.scan('%')) |
| 67 | return false; |
| 68 | |
| 69 | if (number < 0 || number > 100) |
| 70 | return false; |
| 71 | |
| 72 | percentage = number; |
| 73 | return true; |
| 74 | } |
| 75 | |
| 76 | bool WebVTTParser::parseFloatPercentageValuePair(VTTScanner& valueScanner, char delimiter, FloatPoint& valuePair) |
| 77 | { |
| 78 | float firstCoord; |
| 79 | if (!parseFloatPercentageValue(valueScanner, firstCoord)) |
| 80 | return false; |
| 81 | |
| 82 | if (!valueScanner.scan(delimiter)) |
| 83 | return false; |
| 84 | |
| 85 | float secondCoord; |
| 86 | if (!parseFloatPercentageValue(valueScanner, secondCoord)) |
| 87 | return false; |
| 88 | |
| 89 | valuePair = FloatPoint(firstCoord, secondCoord); |
| 90 | return true; |
| 91 | } |
| 92 | |
| 93 | WebVTTParser::WebVTTParser(WebVTTParserClient* client, ScriptExecutionContext* context) |
| 94 | : m_scriptExecutionContext(context) |
| 95 | , m_state(Initial) |
| 96 | , m_decoder(TextResourceDecoder::create("text/plain" , UTF8Encoding())) |
| 97 | , m_client(client) |
| 98 | { |
| 99 | } |
| 100 | |
| 101 | void WebVTTParser::getNewCues(Vector<RefPtr<WebVTTCueData>>& outputCues) |
| 102 | { |
| 103 | outputCues = WTFMove(m_cuelist); |
| 104 | } |
| 105 | |
| 106 | void WebVTTParser::getNewRegions(Vector<RefPtr<VTTRegion>>& outputRegions) |
| 107 | { |
| 108 | outputRegions = WTFMove(m_regionList); |
| 109 | } |
| 110 | |
| 111 | Vector<String> WebVTTParser::getStyleSheets() |
| 112 | { |
| 113 | return WTFMove(m_styleSheets); |
| 114 | } |
| 115 | |
| 116 | void WebVTTParser::(String&& data) |
| 117 | { |
| 118 | m_state = Initial; |
| 119 | m_lineReader.reset(); |
| 120 | m_lineReader.append(WTFMove(data)); |
| 121 | parse(); |
| 122 | } |
| 123 | |
| 124 | void WebVTTParser::parseBytes(const char* data, unsigned length) |
| 125 | { |
| 126 | m_lineReader.append(m_decoder->decode(data, length)); |
| 127 | parse(); |
| 128 | } |
| 129 | |
| 130 | void WebVTTParser::parseCueData(const ISOWebVTTCue& data) |
| 131 | { |
| 132 | auto cue = WebVTTCueData::create(); |
| 133 | |
| 134 | MediaTime startTime = data.presentationTime(); |
| 135 | cue->setStartTime(startTime); |
| 136 | cue->setEndTime(startTime + data.duration()); |
| 137 | |
| 138 | cue->setContent(data.cueText()); |
| 139 | cue->setId(data.id()); |
| 140 | cue->setSettings(data.settings()); |
| 141 | |
| 142 | MediaTime originalStartTime; |
| 143 | if (WebVTTParser::collectTimeStamp(data.originalStartTime(), originalStartTime)) |
| 144 | cue->setOriginalStartTime(originalStartTime); |
| 145 | |
| 146 | m_cuelist.append(WTFMove(cue)); |
| 147 | if (m_client) |
| 148 | m_client->newCuesParsed(); |
| 149 | } |
| 150 | |
| 151 | void WebVTTParser::flush() |
| 152 | { |
| 153 | m_lineReader.append(m_decoder->flush()); |
| 154 | m_lineReader.appendEndOfStream(); |
| 155 | parse(); |
| 156 | flushPendingCue(); |
| 157 | } |
| 158 | |
| 159 | void WebVTTParser::parse() |
| 160 | { |
| 161 | // WebVTT parser algorithm. (5.1 WebVTT file parsing.) |
| 162 | // Steps 1 - 3 - Initial setup. |
| 163 | while (auto line = m_lineReader.nextLine()) { |
| 164 | switch (m_state) { |
| 165 | case Initial: |
| 166 | // Steps 4 - 9 - Check for a valid WebVTT signature. |
| 167 | if (!hasRequiredFileIdentifier(*line)) { |
| 168 | if (m_client) |
| 169 | m_client->fileFailedToParse(); |
| 170 | return; |
| 171 | } |
| 172 | |
| 173 | m_state = Header; |
| 174 | break; |
| 175 | |
| 176 | case Header: |
| 177 | // Steps 11 - 14 - Collect WebVTT block |
| 178 | m_state = collectWebVTTBlock(*line); |
| 179 | break; |
| 180 | |
| 181 | case Region: |
| 182 | m_state = collectRegionSettings(*line); |
| 183 | break; |
| 184 | |
| 185 | case Style: |
| 186 | m_state = collectStyleSheet(*line); |
| 187 | break; |
| 188 | |
| 189 | case Id: |
| 190 | // Steps 17 - 20 - Allow any number of line terminators, then initialize new cue values. |
| 191 | if (line->isEmpty()) |
| 192 | break; |
| 193 | |
| 194 | // Step 21 - Cue creation (start a new cue). |
| 195 | resetCueValues(); |
| 196 | |
| 197 | // Steps 22 - 25 - Check if this line contains an optional identifier or timing data. |
| 198 | m_state = collectCueId(*line); |
| 199 | break; |
| 200 | |
| 201 | case TimingsAndSettings: |
| 202 | // Steps 26 - 27 - Discard current cue if the line is empty. |
| 203 | if (line->isEmpty()) { |
| 204 | m_state = Id; |
| 205 | break; |
| 206 | } |
| 207 | |
| 208 | // Steps 28 - 29 - Collect cue timings and settings. |
| 209 | m_state = collectTimingsAndSettings(*line); |
| 210 | break; |
| 211 | |
| 212 | case CueText: |
| 213 | // Steps 31 - 41 - Collect the cue text, create a cue, and add it to the output. |
| 214 | m_state = collectCueText(*line); |
| 215 | break; |
| 216 | |
| 217 | case BadCue: |
| 218 | // Steps 42 - 48 - Discard lines until an empty line or a potential timing line is seen. |
| 219 | m_state = ignoreBadCue(*line); |
| 220 | break; |
| 221 | |
| 222 | case Finished: |
| 223 | ASSERT_NOT_REACHED(); |
| 224 | break; |
| 225 | } |
| 226 | } |
| 227 | } |
| 228 | |
| 229 | void WebVTTParser::fileFinished() |
| 230 | { |
| 231 | ASSERT(m_state != Finished); |
| 232 | parseBytes("\n\n" , 2); |
| 233 | m_state = Finished; |
| 234 | } |
| 235 | |
| 236 | void WebVTTParser::flushPendingCue() |
| 237 | { |
| 238 | ASSERT(m_lineReader.isAtEndOfStream()); |
| 239 | // If we're in the CueText state when we run out of data, we emit the pending cue. |
| 240 | if (m_state == CueText) |
| 241 | createNewCue(); |
| 242 | } |
| 243 | |
| 244 | bool WebVTTParser::hasRequiredFileIdentifier(const String& line) |
| 245 | { |
| 246 | // A WebVTT file identifier consists of an optional BOM character, |
| 247 | // the string "WEBVTT" followed by an optional space or tab character, |
| 248 | // and any number of characters that are not line terminators ... |
| 249 | if (!line.startsWith(fileIdentifier)) |
| 250 | return false; |
| 251 | if (line.length() > fileIdentifierLength && !isHTMLSpace(line[fileIdentifierLength])) |
| 252 | return false; |
| 253 | return true; |
| 254 | } |
| 255 | |
| 256 | WebVTTParser::ParseState WebVTTParser::collectRegionSettings(const String& line) |
| 257 | { |
| 258 | // End of region block |
| 259 | if (checkAndStoreRegion(line)) |
| 260 | return checkAndRecoverCue(line); |
| 261 | |
| 262 | m_currentRegion->setRegionSettings(line); |
| 263 | return Region; |
| 264 | } |
| 265 | |
| 266 | WebVTTParser::ParseState WebVTTParser::collectWebVTTBlock(const String& line) |
| 267 | { |
| 268 | // collect a WebVTT block parsing. (WebVTT parser algorithm step 14) |
| 269 | |
| 270 | if (checkAndCreateRegion(line)) |
| 271 | return Region; |
| 272 | |
| 273 | if (checkStyleSheet(line)) |
| 274 | return Style; |
| 275 | |
| 276 | // Handle cue block. |
| 277 | ParseState state = checkAndRecoverCue(line); |
| 278 | if (state != Header) { |
| 279 | if (m_client) { |
| 280 | if (!m_regionList.isEmpty()) |
| 281 | m_client->newRegionsParsed(); |
| 282 | if (!m_styleSheets.isEmpty()) |
| 283 | m_client->newStyleSheetsParsed(); |
| 284 | } |
| 285 | if (!m_previousLine.isEmpty() && !m_previousLine.contains("-->" )) |
| 286 | m_currentId = m_previousLine; |
| 287 | |
| 288 | return state; |
| 289 | } |
| 290 | |
| 291 | // store previous line for cue id. |
| 292 | // length is more than 1 line clear m_previousLine and ignore line. |
| 293 | if (m_previousLine.isEmpty()) |
| 294 | m_previousLine = line; |
| 295 | else |
| 296 | m_previousLine = emptyString(); |
| 297 | |
| 298 | return state; |
| 299 | } |
| 300 | |
| 301 | WebVTTParser::ParseState WebVTTParser::checkAndRecoverCue(const String& line) |
| 302 | { |
| 303 | // parse cue timings and settings |
| 304 | if (line.contains("-->" )) { |
| 305 | ParseState state = recoverCue(line); |
| 306 | if (state != BadCue) |
| 307 | return state; |
| 308 | } |
| 309 | return Header; |
| 310 | } |
| 311 | |
| 312 | WebVTTParser::ParseState WebVTTParser::collectStyleSheet(const String& line) |
| 313 | { |
| 314 | // End of style block |
| 315 | if (checkAndStoreStyleSheet(line)) |
| 316 | return checkAndRecoverCue(line); |
| 317 | |
| 318 | m_currentStyleSheet.append(line); |
| 319 | return Style; |
| 320 | } |
| 321 | |
| 322 | bool WebVTTParser::checkAndCreateRegion(const String& line) |
| 323 | { |
| 324 | if (m_previousLine.contains("-->" )) |
| 325 | return false; |
| 326 | // line starts with the substring "REGION" and remaining characters |
| 327 | // zero or more U+0020 SPACE characters or U+0009 CHARACTER TABULATION |
| 328 | // (tab) characters expected other than these charecters it is invalid. |
| 329 | if (line.startsWith("REGION" ) && line.substring(regionIdentifierLength).isAllSpecialCharacters<isASpace>()) { |
| 330 | m_currentRegion = VTTRegion::create(*m_scriptExecutionContext); |
| 331 | return true; |
| 332 | } |
| 333 | return false; |
| 334 | } |
| 335 | |
| 336 | bool WebVTTParser::checkAndStoreRegion(const String& line) |
| 337 | { |
| 338 | if (!line.isEmpty() && !line.contains("-->" )) |
| 339 | return false; |
| 340 | |
| 341 | if (!m_currentRegion->id().isEmpty()) { |
| 342 | // If the text track list of regions regions contains a region |
| 343 | // with the same region identifier value as region, remove that region. |
| 344 | for (const auto& region : m_regionList) { |
| 345 | if (region->id() == m_currentRegion->id()) { |
| 346 | m_regionList.removeFirst(region); |
| 347 | break; |
| 348 | } |
| 349 | } |
| 350 | m_regionList.append(m_currentRegion); |
| 351 | } |
| 352 | m_currentRegion = nullptr; |
| 353 | return true; |
| 354 | } |
| 355 | |
| 356 | bool WebVTTParser::checkStyleSheet(const String& line) |
| 357 | { |
| 358 | if (m_previousLine.contains("-->" )) |
| 359 | return false; |
| 360 | // line starts with the substring "STYLE" and remaining characters |
| 361 | // zero or more U+0020 SPACE characters or U+0009 CHARACTER TABULATION |
| 362 | // (tab) characters expected other than these charecters it is invalid. |
| 363 | if (line.startsWith("STYLE" ) && line.substring(styleIdentifierLength).isAllSpecialCharacters<isASpace>()) |
| 364 | return true; |
| 365 | |
| 366 | return false; |
| 367 | } |
| 368 | |
| 369 | bool WebVTTParser::checkAndStoreStyleSheet(const String& line) |
| 370 | { |
| 371 | if (!line.isEmpty() && !line.contains("-->" )) |
| 372 | return false; |
| 373 | |
| 374 | auto styleSheet = WTFMove(m_currentStyleSheet); |
| 375 | |
| 376 | auto contents = StyleSheetContents::create(); |
| 377 | if (!contents->parseString(styleSheet)) |
| 378 | return true; |
| 379 | |
| 380 | auto& namespaceRules = contents->namespaceRules(); |
| 381 | if (namespaceRules.size()) |
| 382 | return true; |
| 383 | |
| 384 | auto& importRules = contents->importRules(); |
| 385 | if (importRules.size()) |
| 386 | return true; |
| 387 | |
| 388 | auto& childRules = contents->childRules(); |
| 389 | if (!childRules.size()) |
| 390 | return true; |
| 391 | |
| 392 | for (auto rule : childRules) { |
| 393 | if (!rule->isStyleRule()) |
| 394 | return true; |
| 395 | const auto& styleRule = downcast<StyleRule>(rule.get()); |
| 396 | |
| 397 | const auto& selectorList = styleRule->selectorList(); |
| 398 | if (selectorList.listSize() != 1) |
| 399 | return true; |
| 400 | auto selector = selectorList.selectorAt(0); |
| 401 | if (selector->selectorText() != "::cue" ) |
| 402 | return true; |
| 403 | } |
| 404 | |
| 405 | m_styleSheets.append(styleSheet); |
| 406 | return true; |
| 407 | } |
| 408 | |
| 409 | WebVTTParser::ParseState WebVTTParser::collectCueId(const String& line) |
| 410 | { |
| 411 | if (line.contains("-->" )) |
| 412 | return collectTimingsAndSettings(line); |
| 413 | m_currentId = line; |
| 414 | return TimingsAndSettings; |
| 415 | } |
| 416 | |
| 417 | WebVTTParser::ParseState WebVTTParser::collectTimingsAndSettings(const String& line) |
| 418 | { |
| 419 | if (line.isEmpty()) |
| 420 | return BadCue; |
| 421 | |
| 422 | VTTScanner input(line); |
| 423 | |
| 424 | // Collect WebVTT cue timings and settings. (5.3 WebVTT cue timings and settings parsing.) |
| 425 | // Steps 1 - 3 - Let input be the string being parsed and position be a pointer into input |
| 426 | input.skipWhile<isHTMLSpace<UChar>>(); |
| 427 | |
| 428 | // Steps 4 - 5 - Collect a WebVTT timestamp. If that fails, then abort and return failure. Otherwise, let cue's text track cue start time be the collected time. |
| 429 | if (!collectTimeStamp(input, m_currentStartTime)) |
| 430 | return BadCue; |
| 431 | |
| 432 | input.skipWhile<isHTMLSpace<UChar>>(); |
| 433 | |
| 434 | // Steps 6 - 9 - If the next three characters are not "-->", abort and return failure. |
| 435 | if (!input.scan("-->" )) |
| 436 | return BadCue; |
| 437 | |
| 438 | input.skipWhile<isHTMLSpace<UChar>>(); |
| 439 | |
| 440 | // Steps 10 - 11 - Collect a WebVTT timestamp. If that fails, then abort and return failure. Otherwise, let cue's text track cue end time be the collected time. |
| 441 | if (!collectTimeStamp(input, m_currentEndTime)) |
| 442 | return BadCue; |
| 443 | |
| 444 | input.skipWhile<isHTMLSpace<UChar>>(); |
| 445 | |
| 446 | // Step 12 - Parse the WebVTT settings for the cue (conducted in TextTrackCue). |
| 447 | m_currentSettings = input.restOfInputAsString(); |
| 448 | return CueText; |
| 449 | } |
| 450 | |
| 451 | WebVTTParser::ParseState WebVTTParser::collectCueText(const String& line) |
| 452 | { |
| 453 | // Step 34. |
| 454 | if (line.isEmpty()) { |
| 455 | createNewCue(); |
| 456 | return Id; |
| 457 | } |
| 458 | // Step 35. |
| 459 | if (line.contains("-->" )) { |
| 460 | // Step 39-40. |
| 461 | createNewCue(); |
| 462 | |
| 463 | // Step 41 - New iteration of the cue loop. |
| 464 | return recoverCue(line); |
| 465 | } |
| 466 | if (!m_currentContent.isEmpty()) |
| 467 | m_currentContent.append('\n'); |
| 468 | m_currentContent.append(line); |
| 469 | |
| 470 | return CueText; |
| 471 | } |
| 472 | |
| 473 | WebVTTParser::ParseState WebVTTParser::recoverCue(const String& line) |
| 474 | { |
| 475 | // Step 17 and 21. |
| 476 | resetCueValues(); |
| 477 | |
| 478 | // Step 22. |
| 479 | return collectTimingsAndSettings(line); |
| 480 | } |
| 481 | |
| 482 | WebVTTParser::ParseState WebVTTParser::ignoreBadCue(const String& line) |
| 483 | { |
| 484 | if (line.isEmpty()) |
| 485 | return Id; |
| 486 | if (line.contains("-->" )) |
| 487 | return recoverCue(line); |
| 488 | return BadCue; |
| 489 | } |
| 490 | |
| 491 | // A helper class for the construction of a "cue fragment" from the cue text. |
| 492 | class WebVTTTreeBuilder { |
| 493 | public: |
| 494 | WebVTTTreeBuilder(Document& document) |
| 495 | : m_document(document) { } |
| 496 | |
| 497 | Ref<DocumentFragment> buildFromString(const String& cueText); |
| 498 | |
| 499 | private: |
| 500 | void constructTreeFromToken(Document&); |
| 501 | |
| 502 | WebVTTToken m_token; |
| 503 | RefPtr<ContainerNode> m_currentNode; |
| 504 | Vector<AtomicString> m_languageStack; |
| 505 | Document& m_document; |
| 506 | }; |
| 507 | |
| 508 | Ref<DocumentFragment> WebVTTTreeBuilder::buildFromString(const String& cueText) |
| 509 | { |
| 510 | // Cue text processing based on |
| 511 | // 5.4 WebVTT cue text parsing rules, and |
| 512 | // 5.5 WebVTT cue text DOM construction rules. |
| 513 | auto fragment = DocumentFragment::create(m_document); |
| 514 | |
| 515 | if (cueText.isEmpty()) { |
| 516 | fragment->parserAppendChild(Text::create(m_document, emptyString())); |
| 517 | return fragment; |
| 518 | } |
| 519 | |
| 520 | m_currentNode = fragment.ptr(); |
| 521 | |
| 522 | WebVTTTokenizer tokenizer(cueText); |
| 523 | m_languageStack.clear(); |
| 524 | |
| 525 | while (tokenizer.nextToken(m_token)) |
| 526 | constructTreeFromToken(m_document); |
| 527 | |
| 528 | return fragment; |
| 529 | } |
| 530 | |
| 531 | Ref<DocumentFragment> WebVTTParser::createDocumentFragmentFromCueText(Document& document, const String& cueText) |
| 532 | { |
| 533 | WebVTTTreeBuilder treeBuilder(document); |
| 534 | return treeBuilder.buildFromString(cueText); |
| 535 | } |
| 536 | |
| 537 | void WebVTTParser::createNewCue() |
| 538 | { |
| 539 | auto cue = WebVTTCueData::create(); |
| 540 | cue->setStartTime(m_currentStartTime); |
| 541 | cue->setEndTime(m_currentEndTime); |
| 542 | cue->setContent(m_currentContent.toString()); |
| 543 | cue->setId(m_currentId); |
| 544 | cue->setSettings(m_currentSettings); |
| 545 | |
| 546 | m_cuelist.append(WTFMove(cue)); |
| 547 | if (m_client) |
| 548 | m_client->newCuesParsed(); |
| 549 | } |
| 550 | |
| 551 | void WebVTTParser::resetCueValues() |
| 552 | { |
| 553 | m_currentId = emptyString(); |
| 554 | m_currentSettings = emptyString(); |
| 555 | m_currentStartTime = MediaTime::zeroTime(); |
| 556 | m_currentEndTime = MediaTime::zeroTime(); |
| 557 | m_currentContent.clear(); |
| 558 | } |
| 559 | |
| 560 | bool WebVTTParser::collectTimeStamp(const String& line, MediaTime& timeStamp) |
| 561 | { |
| 562 | if (line.isEmpty()) |
| 563 | return false; |
| 564 | |
| 565 | VTTScanner input(line); |
| 566 | return collectTimeStamp(input, timeStamp); |
| 567 | } |
| 568 | |
| 569 | bool WebVTTParser::collectTimeStamp(VTTScanner& input, MediaTime& timeStamp) |
| 570 | { |
| 571 | // Collect a WebVTT timestamp (5.3 WebVTT cue timings and settings parsing.) |
| 572 | // Steps 1 - 4 - Initial checks, let most significant units be minutes. |
| 573 | enum Mode { minutes, hours }; |
| 574 | Mode mode = minutes; |
| 575 | |
| 576 | // Steps 5 - 7 - Collect a sequence of characters that are 0-9. |
| 577 | // If not 2 characters or value is greater than 59, interpret as hours. |
| 578 | int value1; |
| 579 | unsigned value1Digits = input.scanDigits(value1); |
| 580 | if (!value1Digits) |
| 581 | return false; |
| 582 | if (value1Digits != 2 || value1 > 59) |
| 583 | mode = hours; |
| 584 | |
| 585 | // Steps 8 - 11 - Collect the next sequence of 0-9 after ':' (must be 2 chars). |
| 586 | int value2; |
| 587 | if (!input.scan(':') || input.scanDigits(value2) != 2) |
| 588 | return false; |
| 589 | |
| 590 | // Step 12 - Detect whether this timestamp includes hours. |
| 591 | int value3; |
| 592 | if (mode == hours || input.match(':')) { |
| 593 | if (!input.scan(':') || input.scanDigits(value3) != 2) |
| 594 | return false; |
| 595 | } else { |
| 596 | value3 = value2; |
| 597 | value2 = value1; |
| 598 | value1 = 0; |
| 599 | } |
| 600 | |
| 601 | // Steps 13 - 17 - Collect next sequence of 0-9 after '.' (must be 3 chars). |
| 602 | int value4; |
| 603 | if (!input.scan('.') || input.scanDigits(value4) != 3) |
| 604 | return false; |
| 605 | if (value2 > 59 || value3 > 59) |
| 606 | return false; |
| 607 | |
| 608 | // Steps 18 - 19 - Calculate result. |
| 609 | timeStamp = MediaTime::createWithDouble((value1 * secondsPerHour) + (value2 * secondsPerMinute) + value3 + (value4 * secondsPerMillisecond)); |
| 610 | return true; |
| 611 | } |
| 612 | |
| 613 | static WebVTTNodeType tokenToNodeType(WebVTTToken& token) |
| 614 | { |
| 615 | switch (token.name().length()) { |
| 616 | case 1: |
| 617 | if (token.name()[0] == 'c') |
| 618 | return WebVTTNodeTypeClass; |
| 619 | if (token.name()[0] == 'v') |
| 620 | return WebVTTNodeTypeVoice; |
| 621 | if (token.name()[0] == 'b') |
| 622 | return WebVTTNodeTypeBold; |
| 623 | if (token.name()[0] == 'i') |
| 624 | return WebVTTNodeTypeItalic; |
| 625 | if (token.name()[0] == 'u') |
| 626 | return WebVTTNodeTypeUnderline; |
| 627 | break; |
| 628 | case 2: |
| 629 | if (token.name()[0] == 'r' && token.name()[1] == 't') |
| 630 | return WebVTTNodeTypeRubyText; |
| 631 | break; |
| 632 | case 4: |
| 633 | if (token.name()[0] == 'r' && token.name()[1] == 'u' && token.name()[2] == 'b' && token.name()[3] == 'y') |
| 634 | return WebVTTNodeTypeRuby; |
| 635 | if (token.name()[0] == 'l' && token.name()[1] == 'a' && token.name()[2] == 'n' && token.name()[3] == 'g') |
| 636 | return WebVTTNodeTypeLanguage; |
| 637 | break; |
| 638 | } |
| 639 | return WebVTTNodeTypeNone; |
| 640 | } |
| 641 | |
| 642 | void WebVTTTreeBuilder::constructTreeFromToken(Document& document) |
| 643 | { |
| 644 | // http://dev.w3.org/html5/webvtt/#webvtt-cue-text-dom-construction-rules |
| 645 | |
| 646 | switch (m_token.type()) { |
| 647 | case WebVTTTokenTypes::Character: { |
| 648 | m_currentNode->parserAppendChild(Text::create(document, m_token.characters())); |
| 649 | break; |
| 650 | } |
| 651 | case WebVTTTokenTypes::StartTag: { |
| 652 | WebVTTNodeType nodeType = tokenToNodeType(m_token); |
| 653 | if (nodeType == WebVTTNodeTypeNone) |
| 654 | break; |
| 655 | |
| 656 | WebVTTNodeType currentType = is<WebVTTElement>(*m_currentNode) ? downcast<WebVTTElement>(*m_currentNode).webVTTNodeType() : WebVTTNodeTypeNone; |
| 657 | // <rt> is only allowed if the current node is <ruby>. |
| 658 | if (nodeType == WebVTTNodeTypeRubyText && currentType != WebVTTNodeTypeRuby) |
| 659 | break; |
| 660 | |
| 661 | auto child = WebVTTElement::create(nodeType, document); |
| 662 | if (!m_token.classes().isEmpty()) |
| 663 | child->setAttributeWithoutSynchronization(classAttr, m_token.classes()); |
| 664 | |
| 665 | if (nodeType == WebVTTNodeTypeVoice) |
| 666 | child->setAttributeWithoutSynchronization(WebVTTElement::voiceAttributeName(), m_token.annotation()); |
| 667 | else if (nodeType == WebVTTNodeTypeLanguage) { |
| 668 | m_languageStack.append(m_token.annotation()); |
| 669 | child->setAttributeWithoutSynchronization(WebVTTElement::langAttributeName(), m_languageStack.last()); |
| 670 | } |
| 671 | if (!m_languageStack.isEmpty()) |
| 672 | child->setLanguage(m_languageStack.last()); |
| 673 | m_currentNode->parserAppendChild(child); |
| 674 | m_currentNode = WTFMove(child); |
| 675 | break; |
| 676 | } |
| 677 | case WebVTTTokenTypes::EndTag: { |
| 678 | WebVTTNodeType nodeType = tokenToNodeType(m_token); |
| 679 | if (nodeType == WebVTTNodeTypeNone) |
| 680 | break; |
| 681 | |
| 682 | // The only non-VTTElement would be the DocumentFragment root. (Text |
| 683 | // nodes and PIs will never appear as m_currentNode.) |
| 684 | if (!is<WebVTTElement>(*m_currentNode)) |
| 685 | break; |
| 686 | |
| 687 | WebVTTNodeType currentType = downcast<WebVTTElement>(*m_currentNode).webVTTNodeType(); |
| 688 | bool matchesCurrent = nodeType == currentType; |
| 689 | if (!matchesCurrent) { |
| 690 | // </ruby> auto-closes <rt> |
| 691 | if (currentType == WebVTTNodeTypeRubyText && nodeType == WebVTTNodeTypeRuby) { |
| 692 | if (m_currentNode->parentNode()) |
| 693 | m_currentNode = m_currentNode->parentNode(); |
| 694 | } else |
| 695 | break; |
| 696 | } |
| 697 | if (nodeType == WebVTTNodeTypeLanguage) |
| 698 | m_languageStack.removeLast(); |
| 699 | if (m_currentNode->parentNode()) |
| 700 | m_currentNode = m_currentNode->parentNode(); |
| 701 | break; |
| 702 | } |
| 703 | case WebVTTTokenTypes::TimestampTag: { |
| 704 | String = m_token.characters(); |
| 705 | MediaTime parsedTimeStamp; |
| 706 | if (WebVTTParser::collectTimeStamp(charactersString, parsedTimeStamp)) |
| 707 | m_currentNode->parserAppendChild(ProcessingInstruction::create(document, "timestamp" , charactersString)); |
| 708 | break; |
| 709 | } |
| 710 | default: |
| 711 | break; |
| 712 | } |
| 713 | } |
| 714 | |
| 715 | } |
| 716 | |
| 717 | #endif |
| 718 | |