+ protected function generateTermScoreMapFromText(string $text, int $scoreAdjustment = 1): array
+ {
+ $termMap = $this->textToTermCountMap($text);
+
+ foreach ($termMap as $term => $count) {
+ $termMap[$term] = $count * $scoreAdjustment;
+ }
+
+ return $termMap;
+ }
+
+ /**
+ * Create a scored term array from the given HTML, where the keys are the terms
+ * and the values are their scores.
+ *
+ * @returns array<string, int>
+ */
+ protected function generateTermScoreMapFromHtml(string $html): array
+ {
+ if (empty($html)) {
+ return [];
+ }
+
+ $scoresByTerm = [];
+ $elementScoreAdjustmentMap = [
+ 'h1' => 10,
+ 'h2' => 5,
+ 'h3' => 4,
+ 'h4' => 3,
+ 'h5' => 2,
+ 'h6' => 1.5,
+ ];
+
+ $html = '<body>' . $html . '</body>';
+ libxml_use_internal_errors(true);
+ $doc = new DOMDocument();
+ $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
+
+ $topElems = $doc->documentElement->childNodes->item(0)->childNodes;
+ /** @var DOMNode $child */
+ foreach ($topElems as $child) {
+ $nodeName = $child->nodeName;
+ $termCounts = $this->textToTermCountMap(trim($child->textContent));
+ foreach ($termCounts as $term => $count) {
+ $scoreChange = $count * ($elementScoreAdjustmentMap[$nodeName] ?? 1);
+ $scoresByTerm[$term] = ($scoresByTerm[$term] ?? 0) + $scoreChange;
+ }
+ }
+
+ return $scoresByTerm;
+ }
+
+ /**
+ * For the given text, return an array where the keys are the unique term words
+ * and the values are the frequency of that term.
+ *
+ * @returns array<string, int>
+ */
+ protected function textToTermCountMap(string $text): array