Skip to content

Commit

Permalink
feature #48940 [DomCrawler] Add argument $normalizeWhitespace to `C…
Browse files Browse the repository at this point in the history
…rawler::innerText()` and make it return the first non-empty text (otsch)

This PR was merged into the 6.3 branch.

Discussion
----------

[DomCrawler] Add argument `$normalizeWhitespace` to `Crawler::innerText()` and make it return the first non-empty text

This is a new PR instead of #48684 with target branch 6.3 as requested.

| Q             | A
| ------------- | ---
| Branch?       | 6.3
| Bug fix?      | yes
| New feature?  | no
| Deprecations? | no
| Tickets       | Fix #48682
| License       | MIT

Commits
-------

bb0c214 [DomCrawler] Add argument `$normalizeWhitespace` to `Crawler::innerText()` and make it return the first non-empty text
  • Loading branch information
fabpot committed Jan 11, 2023
2 parents be5fbce + bb0c214 commit 991c2ba
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 10 deletions.
2 changes: 2 additions & 0 deletions src/Symfony/Component/DomCrawler/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ CHANGELOG
---

* Add `CrawlerSelectorCount` test constraint
* Add argument `$normalizeWhitespace` to `Crawler::innerText()`
* Make `Crawler::innerText()` return the first non-empty text

6.0
---
Expand Down
27 changes: 24 additions & 3 deletions src/Symfony/Component/DomCrawler/Crawler.php
Original file line number Diff line number Diff line change
Expand Up @@ -553,18 +553,34 @@ public function text(string $default = null, bool $normalizeWhitespace = true):
$text = $this->getNode(0)->nodeValue;

if ($normalizeWhitespace) {
return trim(preg_replace("/(?:[ \n\r\t\x0C]{2,}+|[\n\r\t\x0C])/", ' ', $text), " \n\r\t\x0C");
return $this->normalizeWhitespace($text);
}

return $text;
}

/**
* Returns only the inner text that is the direct descendent of the current node, excluding any child nodes.
*
* @param bool $normalizeWhitespace Whether whitespaces should be trimmed and normalized to single spaces
*/
public function innerText(): string
public function innerText(/* bool $normalizeWhitespace = true */): string
{
return $this->filterXPath('.//text()')->text();
$normalizeWhitespace = 1 <= \func_num_args() ? func_get_arg(0) : true;

foreach ($this->getNode(0)->childNodes as $childNode) {
if (\XML_TEXT_NODE !== $childNode->nodeType) {
continue;
}
if (!$normalizeWhitespace) {
return $childNode->nodeValue;
}
if ('' !== trim($childNode->nodeValue)) {
return $this->normalizeWhitespace($childNode->nodeValue);
}
}

return '';
}

/**
Expand Down Expand Up @@ -1189,4 +1205,9 @@ private function isValidHtml5Heading(string $heading): bool
{
return 1 === preg_match('/^\x{FEFF}?\s*(<!--[^>]*?-->\s*)*$/u', $heading);
}

private function normalizeWhitespace(string $string): string
{
return trim(preg_replace("/(?:[ \n\r\t\x0C]{2,}+|[\n\r\t\x0C])/", ' ', $string), " \n\r\t\x0C");
}
}
61 changes: 54 additions & 7 deletions src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -348,12 +348,56 @@ public function testText()
$this->assertSame('my value', $this->createTestCrawler(null)->filterXPath('//ol')->text('my value'));
}

public function testInnerText()
public function provideInnerTextExamples()
{
self::assertCount(1, $crawler = $this->createTestCrawler()->filterXPath('//*[@id="complex-element"]'));
return [
[
'//*[@id="complex-elements"]/*[@class="one"]', // XPath query
'Parent text Child text', // Result of Crawler::text()
'Parent text', // Result of Crawler::innerText()
' Parent text ', // Result of Crawler::innerText(false)
],
[
'//*[@id="complex-elements"]/*[@class="two"]',
'Child text Parent text',
'Parent text',
' ',
],
[
'//*[@id="complex-elements"]/*[@class="three"]',
'Parent text Child text Parent text',
'Parent text',
' Parent text ',
],
[
'//*[@id="complex-elements"]/*[@class="four"]',
'Child text',
'',
' ',
],
[
'//*[@id="complex-elements"]/*[@class="five"]',
'Child text Another child',
'',
' ',
],
];
}

/**
* @dataProvider provideInnerTextExamples
*/
public function testInnerText(
string $xPathQuery,
string $expectedText,
string $expectedInnerText,
string $expectedInnerTextNormalizeWhitespaceFalse,
) {
self::assertCount(1, $crawler = $this->createTestCrawler()->filterXPath($xPathQuery));

self::assertSame('Parent text Child text', $crawler->text());
self::assertSame('Parent text', $crawler->innerText());
self::assertSame($expectedText, $crawler->text());
self::assertSame($expectedInnerText, $crawler->innerText());
self::assertSame($expectedInnerTextNormalizeWhitespaceFalse, $crawler->innerText(false));
}

public function testHtml()
Expand Down Expand Up @@ -1265,9 +1309,12 @@ public function createTestCrawler($uri = null)
<div id="child2" xmlns:foo="http://example.com"></div>
</div>
<div id="sibling"><img /></div>
<div id="complex-element">
Parent text
<span>Child text</span>
<div id="complex-elements">
<div class="one"> Parent text <span>Child text</span> </div>
<div class="two"> <span>Child text</span> Parent text </div>
<div class="three"> Parent text <span>Child text</span> Parent text </div>
<div class="four"> <span>Child text</span> </div>
<div class="five"><span>Child text</span> <span>Another child</span></div>
</div>
</body>
</html>
Expand Down

0 comments on commit 991c2ba

Please sign in to comment.