Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DomCrawler] Add argument $normalizeWhitespace to Crawler::innerText() and make it return the first non-empty text #48940

Merged
merged 1 commit into from
Jan 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/Symfony/Component/DomCrawler/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ CHANGELOG
---

* Add `CrawlerSelectorCount` test constraint
* Add argument `$normalizeWhitespace` to `Crawler::innerText()`
* Make `Crawler::innerText()` return the first non-empty text

6.0
---
Expand Down
27 changes: 24 additions & 3 deletions src/Symfony/Component/DomCrawler/Crawler.php
Original file line number Diff line number Diff line change
Expand Up @@ -553,18 +553,34 @@ public function text(string $default = null, bool $normalizeWhitespace = true):
$text = $this->getNode(0)->nodeValue;

if ($normalizeWhitespace) {
return trim(preg_replace("/(?:[ \n\r\t\x0C]{2,}+|[\n\r\t\x0C])/", ' ', $text), " \n\r\t\x0C");
return $this->normalizeWhitespace($text);
}

return $text;
}

/**
* Returns only the inner text that is the direct descendent of the current node, excluding any child nodes.
*
* @param bool $normalizeWhitespace Whether whitespaces should be trimmed and normalized to single spaces
*/
public function innerText(): string
public function innerText(/* bool $normalizeWhitespace = true */): string
{
return $this->filterXPath('.//text()')->text();
$normalizeWhitespace = 1 <= \func_num_args() ? func_get_arg(0) : true;

foreach ($this->getNode(0)->childNodes as $childNode) {
if (\XML_TEXT_NODE !== $childNode->nodeType) {
continue;
}
if (!$normalizeWhitespace) {
return $childNode->nodeValue;
}
if ('' !== trim($childNode->nodeValue)) {
return $this->normalizeWhitespace($childNode->nodeValue);
}
}

return '';
nicolas-grekas marked this conversation as resolved.
Show resolved Hide resolved
}

/**
Expand Down Expand Up @@ -1189,4 +1205,9 @@ private function isValidHtml5Heading(string $heading): bool
{
return 1 === preg_match('/^\x{FEFF}?\s*(<!--[^>]*?-->\s*)*$/u', $heading);
}

private function normalizeWhitespace(string $string): string
{
return trim(preg_replace("/(?:[ \n\r\t\x0C]{2,}+|[\n\r\t\x0C])/", ' ', $string), " \n\r\t\x0C");
}
}
61 changes: 54 additions & 7 deletions src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -348,12 +348,56 @@ public function testText()
$this->assertSame('my value', $this->createTestCrawler(null)->filterXPath('//ol')->text('my value'));
}

public function testInnerText()
public function provideInnerTextExamples()
{
self::assertCount(1, $crawler = $this->createTestCrawler()->filterXPath('//*[@id="complex-element"]'));
return [
[
'//*[@id="complex-elements"]/*[@class="one"]', // XPath query
'Parent text Child text', // Result of Crawler::text()
'Parent text', // Result of Crawler::innerText()
' Parent text ', // Result of Crawler::innerText(false)
],
[
'//*[@id="complex-elements"]/*[@class="two"]',
'Child text Parent text',
'Parent text',
' ',
],
[
'//*[@id="complex-elements"]/*[@class="three"]',
'Parent text Child text Parent text',
'Parent text',
' Parent text ',
],
[
'//*[@id="complex-elements"]/*[@class="four"]',
'Child text',
'',
' ',
],
[
'//*[@id="complex-elements"]/*[@class="five"]',
'Child text Another child',
'',
' ',
],
];
}

/**
* @dataProvider provideInnerTextExamples
*/
public function testInnerText(
string $xPathQuery,
string $expectedText,
string $expectedInnerText,
string $expectedInnerTextNormalizeWhitespaceFalse,
) {
self::assertCount(1, $crawler = $this->createTestCrawler()->filterXPath($xPathQuery));

self::assertSame('Parent text Child text', $crawler->text());
self::assertSame('Parent text', $crawler->innerText());
self::assertSame($expectedText, $crawler->text());
self::assertSame($expectedInnerText, $crawler->innerText());
self::assertSame($expectedInnerTextNormalizeWhitespaceFalse, $crawler->innerText(false));
}

public function testHtml()
Expand Down Expand Up @@ -1265,9 +1309,12 @@ public function createTestCrawler($uri = null)
<div id="child2" xmlns:foo="http://example.com"></div>
</div>
<div id="sibling"><img /></div>
<div id="complex-element">
Parent text
<span>Child text</span>
<div id="complex-elements">
<div class="one"> Parent text <span>Child text</span> </div>
<div class="two"> <span>Child text</span> Parent text </div>
<div class="three"> Parent text <span>Child text</span> Parent text </div>
<div class="four"> <span>Child text</span> </div>
<div class="five"><span>Child text</span> <span>Another child</span></div>
</div>
</body>
</html>
Expand Down