/** @var DOMNode $child */
foreach ($doc->getBodyChildren() as $child) {
$nodeName = $child->nodeName;
- $termCounts = $this->textToTermCountMap(trim($child->textContent));
+ $text = trim($child->textContent);
+ $text = str_replace("\u{00A0}", ' ', $text);
+ $termCounts = $this->textToTermCountMap($text);
foreach ($termCounts as $term => $count) {
$scoreChange = $count * ($elementScoreAdjustmentMap[$nodeName] ?? 1);
$scoresByTerm[$term] = ($scoresByTerm[$term] ?? 0) + $scoreChange;
$this->assertNull($scoreByTerm->get($term), "Failed asserting that \"$term\" is not indexed");
}
}
+
+ public function test_non_breaking_spaces_handled_as_spaces()
+ {
+ $page = $this->entities->newPage(['html' => '<p>a tigerbadger is a dangerous animal</p>']);
+
+ $scoreByTerm = $page->searchTerms()->pluck('score', 'term');
+ $this->assertNotNull($scoreByTerm->get('tigerbadger'));
+ $this->assertNotNull($scoreByTerm->get('dangerous'));
+ $this->assertNotNull($scoreByTerm->get('animal'));
+ }
}