{
protected static string $includeTagRegex = "/{{@\s?([0-9].*?)}}/";
+ /**
+ * Elements to clean up and remove if left empty after a parsing operation.
+ * @var DOMElement[]
+ */
+ protected array $toCleanup = [];
+
public function __construct(
protected string $pageHtml,
protected Closure $pageContentForId,
) {
}
+ /**
+ * Parse out the include tags.
+ */
public function parse(): string
{
$doc = new HtmlDocument($this->pageHtml);
$tags = $this->locateAndIsolateIncludeTags($doc);
- $topLevel = [...$doc->getBodyChildren()];
foreach ($tags as $tag) {
$htmlContent = $this->pageContentForId->call($this, $tag->getPageId());
$content = new PageIncludeContent($htmlContent, $tag);
if (!$content->isInline()) {
- $isParentTopLevel = in_array($tag->domNode->parentNode, $topLevel, true);
- if ($isParentTopLevel) {
+ $parentP = $this->getParentParagraph($tag->domNode);
+ $isWithinParentP = $parentP === $tag->domNode->parentNode;
+ if ($parentP && $isWithinParentP) {
$this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode);
- } else {
- $this->promoteTagNodeToBody($tag, $doc->getBody());
+ } else if ($parentP) {
+ $this->moveTagNodeToBesideParent($tag, $parentP);
}
}
$this->replaceNodeWithNodes($tag->domNode, $content->toDomNodes());
}
- // TODO Notes: May want to eventually parse through backwards, which should avoid issues
- // in changes affecting the next tag, where tags may be in the same/adjacent nodes.
+ $this->cleanup();
return $doc->getBodyInnerHtml();
}
*/
protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array
{
- $includeHosts = $doc->queryXPath("//body//*[contains(text(), '{{@')]");
+ $includeHosts = $doc->queryXPath("//body//*[text()[contains(., '{{@')]]");
$includeTags = [];
/** @var DOMNode $node */
}
/**
+ * Replace the given node with all those in $replacements
* @param DOMNode[] $replacements
*/
protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void
$toReplace->parentNode->removeChild($toReplace);
}
- protected function promoteTagNodeToBody(PageIncludeTag $tag, DOMNode $body): void
+ /**
+ * Move a tag node to become a sibling of the given parent.
+ * Will attempt to guess a position based upon the tag content within the parent.
+ */
+ protected function moveTagNodeToBesideParent(PageIncludeTag $tag, DOMNode $parent): void
{
- /** @var DOMNode $topParent */
- $topParent = $tag->domNode->parentNode;
- while ($topParent->parentNode !== $body) {
- $topParent = $topParent->parentNode;
- }
-
- $parentText = $topParent->textContent;
+ $parentText = $parent->textContent;
$tagPos = strpos($parentText, $tag->tagContent);
$before = $tagPos < (strlen($parentText) / 2);
if ($before) {
- $body->insertBefore($tag->domNode, $topParent);
+ $parent->parentNode->insertBefore($tag->domNode, $parent);
} else {
- $body->insertBefore($tag->domNode, $topParent->nextSibling);
+ $parent->parentNode->insertBefore($tag->domNode, $parent->nextSibling);
}
}
+ /**
+ * Splits the given $parentNode at the location of the $domNode within it.
+ * Attempts replicate the original $parentNode, moving some of their parent
+ * children in where needed, before adding the $domNode between.
+ */
protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void
{
$children = [...$parentNode->childNodes];
- $splitPos = array_search($domNode, $children, true) ?: count($children);
+ $splitPos = array_search($domNode, $children, true);
+ if ($splitPos === false) {
+ $splitPos = count($children) - 1;
+ }
+
$parentClone = $parentNode->cloneNode();
+ $parentNode->parentNode->insertBefore($parentClone, $parentNode);
$parentClone->removeAttribute('id');
/** @var DOMNode $child */
for ($i = 0; $i < $splitPos; $i++) {
- $child = $children[0];
+ $child = $children[$i];
$parentClone->appendChild($child);
}
- if ($parentClone->hasChildNodes()) {
- $parentNode->parentNode->insertBefore($parentClone, $parentNode);
- }
-
$parentNode->parentNode->insertBefore($domNode, $parentNode);
- $parentClone->normalize();
- $parentNode->normalize();
- if (!$parentNode->hasChildNodes()) {
- $parentNode->remove();
- }
- if (!$parentClone->hasChildNodes()) {
- $parentClone->remove();
+ $this->toCleanup[] = $parentNode;
+ $this->toCleanup[] = $parentClone;
+ }
+
+ /**
+ * Get the parent paragraph of the given node, if existing.
+ */
+ protected function getParentParagraph(DOMNode $parent): ?DOMNode
+ {
+ do {
+ if (strtolower($parent->nodeName) === 'p') {
+ return $parent;
+ }
+
+ $parent = $parent->parentElement;
+ } while ($parent !== null);
+
+ return null;
+ }
+
+ /**
+ * Cleanup after a parse operation.
+ * Removes stranded elements we may have left during the parse.
+ */
+ protected function cleanup(): void
+ {
+ foreach ($this->toCleanup as $element) {
+ $element->normalize();
+ if ($element->parentNode && !$element->hasChildNodes()) {
+ $element->parentNode->removeChild($element);
+ }
}
}
}
);
}
+ public function test_complex_inline_text_within_other_text()
+ {
+ $this->runParserTest(
+ '<p>Hello {{@45#content}}there!</p>',
+ ['45' => '<p id="content"><strong>Testing</strong> with<em>some</em><i>extra</i>tags</p>'],
+ '<p>Hello <strong>Testing</strong> with<em>some</em><i>extra</i>tagsthere!</p>',
+ );
+ }
+
public function test_block_content_types()
{
$inputs = [
);
}
+ public function test_block_content_in_allowable_parent_element()
+ {
+ $this->runParserTest(
+ '<div>{{@45#content}}</div>',
+ ['45' => '<pre id="content">doggos</pre>'],
+ '<div><pre id="content">doggos</pre></div>',
+ );
+ }
+
+ public function test_block_content_in_paragraph_origin_with_allowable_grandparent()
+ {
+ $this->runParserTest(
+ '<div><p>{{@45#content}}</p></div>',
+ ['45' => '<pre id="content">doggos</pre>'],
+ '<div><pre id="content">doggos</pre></div>',
+ );
+ }
+
+ public function test_block_content_in_paragraph_origin_with_allowable_grandparent_with_adjacent_content()
+ {
+ $this->runParserTest(
+ '<div><p>Cute {{@45#content}} over there!</p></div>',
+ ['45' => '<pre id="content">doggos</pre>'],
+ '<div><p>Cute </p><pre id="content">doggos</pre><p> over there!</p></div>',
+ );
+ }
+
+ public function test_block_content_in_child_within_paragraph_origin_with_allowable_grandparent_with_adjacent_content()
+ {
+ $this->runParserTest(
+ '<div><p><strong>Cute {{@45#content}} over there!</strong></p></div>',
+ ['45' => '<pre id="content">doggos</pre>'],
+ '<div><pre id="content">doggos</pre><p><strong>Cute over there!</strong></p></div>',
+ );
+ }
+
+ public function test_block_content_in_paragraph_origin_within_details()
+ {
+ $this->runParserTest(
+ '<details><p>{{@45#content}}</p></details>',
+ ['45' => '<pre id="content">doggos</pre>'],
+ '<details><pre id="content">doggos</pre></details>',
+ );
+ }
+
public function test_simple_whole_document()
{
$this->runParserTest(
);
}
+ public function test_multiple_tags_in_same_origin_with_inline_content()
+ {
+ $this->runParserTest(
+ '<p>This {{@45#content}}{{@45#content}} content is {{@45#content}}</p>',
+ ['45' => '<p id="content">inline</p>'],
+ '<p>This inlineinline content is inline</p>',
+ );
+ }
+
+ public function test_multiple_tags_in_same_origin_with_block_content()
+ {
+ $this->runParserTest(
+ '<p>This {{@45#content}}{{@45#content}} content is {{@45#content}}</p>',
+ ['45' => '<pre id="content">block</pre>'],
+ '<p>This </p><pre id="content">block</pre><pre id="content">block</pre><p> content is </p><pre id="content">block</pre>',
+ );
+ }
+
+ public function test_multiple_tags_in_differing_origin_levels_with_block_content()
+ {
+ $this->runParserTest(
+ '<div><p>This <strong>{{@45#content}}</strong> content is {{@45#content}}</p>{{@45#content}}</div>',
+ ['45' => '<pre id="content">block</pre>'],
+ '<div><pre id="content">block</pre><p>This <strong></strong> content is </p><pre id="content">block</pre><pre id="content">block</pre></div>',
+ );
+ }
+
+ public function test_multiple_tags_in_shallow_origin_with_multi_block_content()
+ {
+ $this->runParserTest(
+ '<p>{{@45}}C{{@45}}</p><div>{{@45}}{{@45}}</div>',
+ ['45' => '<p>A</p><p>B</p>'],
+ '<p>A</p><p>B</p><p>C</p><p>A</p><p>B</p><div><p>A</p><p>B</p><p>A</p><p>B</p></div>',
+ );
+ }
+
protected function runParserTest(string $html, array $contentById, string $expected)
{
$parser = new PageIncludeParser($html, function (int $id) use ($contentById) {