]> BookStack Code Mirror - devops/commitdiff
Added broken internal references tinker script
authordanb <redacted>
Tue, 15 Jul 2025 11:43:23 +0000 (13:43 +0200)
committerdanb <redacted>
Tue, 15 Jul 2025 11:43:23 +0000 (13:43 +0200)
tinker-scripts/find-broken-internal-references.php [new file with mode: 0644]

diff --git a/tinker-scripts/find-broken-internal-references.php b/tinker-scripts/find-broken-internal-references.php
new file mode 100644 (file)
index 0000000..a35a091
--- /dev/null
@@ -0,0 +1,139 @@
+<?php
+
+/**
+This script will find and report broken internal shelf/book/chapter/page links
+within the HTML of shelf/book/chapter descriptions or page content.
+
+This is very simplistic and used just as quick helper.
+This script is unsupported, may have issues, and may break with future BookStack versions.
+
+== Usage ==
+Download the script to your system, then run it via artisan tinker on the BookStack instance.
+The output will show scan progress, then list all detected links by shelf/book/chapter/page.
+== Example ==
+cd /var/www/bookstack
+wget https://codeberg.org/bookstack/devops/raw/branch/main/tinker-scripts/find-broken-internal-references.php
+php artisan tinker ./find-broken-internal-references.php --no-interaction
+*/
+
+use BookStack\Entities\Models\Book;
+use BookStack\Entities\Models\Bookshelf;
+use BookStack\Entities\Models\Chapter;
+use BookStack\Entities\Models\Entity;
+use BookStack\Entities\Models\Page;
+use BookStack\Entities\Queries\ChapterQueries;
+use BookStack\Entities\Queries\PageQueries;
+use Illuminate\Database\Eloquent\Builder;
+
+function findLinksInContent(string $content): array
+{
+    $pattern = '(' . preg_quote(url('/'), '/') . '.+?)([\s"#?<]|$)';
+    $matches = [];
+    preg_match_all("/$pattern/i", $content, $matches);
+    return array_unique($matches[1]);
+}
+
+function linkTargetExists(string $link): bool|null
+{
+    static $pageQueries = null;
+    static $chapterQueries = null;
+
+    if (is_null($pageQueries)) {
+        $pageQueries = app()->make(PageQueries::class);
+        $chapterQueries = app()->make(ChapterQueries::class);
+    }
+
+    // Page
+    $pageMatch = preg_match('/\/books\/(.+?)\/page\/(.+?)([\/?#\s$]|$)/i', $link, $matches);
+    if ($pageMatch) {
+        return $pageQueries->usingSlugs($matches[1], $matches[2])->exists();
+    }
+
+    // Page permalink
+    $pageMatch = preg_match('/\/link\/(\d+?)([\/?#\s$]|$)/i', $link, $matches);
+    if ($pageMatch) {
+        return Page::query()->where('id', '=', intval($matches[1]))->exists();
+    }
+
+    // Chapter
+    $chapterMatch = preg_match('/\/books\/(.+?)\/chapter\/(.+?)([\/?#\s$]|$)/i', $link, $matches);
+    if ($chapterMatch) {
+        return $chapterQueries->usingSlugs($matches[1], $matches[2])->exists();
+    }
+
+    // Book
+    $bookMatch = preg_match('/\/books\/(.+?)\/([\/?#\s$]|$)/i', $link, $matches);
+    if ($bookMatch) {
+        return Book::query()->where('slug', '=', $matches[1])->exists();
+    }
+
+    // Shelf
+    $shelfMatch = preg_match('/\/shelves\/(.*?)\/([\/?#\s$]|$)/i', $link, $matches);
+    if ($shelfMatch) {
+        return Bookshelf::query()->where('slug', '=', $matches[1])->exists();
+    }
+
+    return null;
+}
+function isLinkBroken(string $link): bool
+{
+    static $linkCache = [];
+
+    if (isset($linkCache[$link])) {
+        return $linkCache[$link];
+    }
+
+    $exists = linkTargetExists($link);
+    $isBroken = $exists === false;
+
+    $linkCache[$link] = $isBroken;
+    return $isBroken;
+}
+
+function findBrokenLinksInEntity(Entity $entity): array
+{
+    $allLinks = findLinksInContent($entity->getAttribute($entity->htmlField));
+    return array_filter($allLinks, fn($link) => isLinkBroken($link));
+}
+
+function reportForEntityType(Builder $baseQuery, string $entityType): array
+{
+    $total = $baseQuery->clone()->count();
+    $current = 0;
+    $count = 500;
+    $logLines = [];
+    $baseQuery->clone()->chunk($count, function ($entities) use ($count, &$current, $total, $entityType, &$logLines) {
+        $rangeEnd = min($count + $current, $total);
+        echo "Checking {$entityType}s [{$current}-{$rangeEnd} of {$total}]\n";
+        foreach ($entities as $entity) {
+            $brokenLinks = findBrokenLinksInEntity($entity);
+            if (count($brokenLinks) > 0) {
+                $logLines[] = "------------------------------------------------------";
+                $logLines[] = "Broken/Old URLs found in {$entityType} {$entity->getUrl()}";
+                $logLines[] = "|";
+            }
+            foreach ($brokenLinks as $brokenLink) {
+                 $logLines[] = "|- $brokenLink";
+            }
+        }
+        $current = $rangeEnd;
+    });
+
+    return $logLines;
+}
+
+function findBrokenLinks(): void
+{
+    $logLines = [
+        ...reportForEntityType(Bookshelf::query(), 'shelf'),
+        ...reportForEntityType(Book::query(), 'book'),
+        ...reportForEntityType(Chapter::query(), 'chapter'),
+        ...reportForEntityType(Page::query(), 'page'),
+    ];
+
+    echo "\n=======================\nSearch Complete\n=======================\n\n";
+
+    echo implode("\n", $logLines) . "\n";
+}
+
+findBrokenLinks();