--- /dev/null
+<?php
+
+/**
+This script will find and report broken internal shelf/book/chapter/page links
+within the HTML of shelf/book/chapter descriptions or page content.
+
+This is very simplistic and used just as quick helper.
+This script is unsupported, may have issues, and may break with future BookStack versions.
+
+== Usage ==
+Download the script to your system, then run it via artisan tinker on the BookStack instance.
+The output will show scan progress, then list all detected links by shelf/book/chapter/page.
+== Example ==
+cd /var/www/bookstack
+wget https://codeberg.org/bookstack/devops/raw/branch/main/tinker-scripts/find-broken-internal-references.php
+php artisan tinker ./find-broken-internal-references.php --no-interaction
+*/
+
+use BookStack\Entities\Models\Book;
+use BookStack\Entities\Models\Bookshelf;
+use BookStack\Entities\Models\Chapter;
+use BookStack\Entities\Models\Entity;
+use BookStack\Entities\Models\Page;
+use BookStack\Entities\Queries\ChapterQueries;
+use BookStack\Entities\Queries\PageQueries;
+use Illuminate\Database\Eloquent\Builder;
+
+function findLinksInContent(string $content): array
+{
+ $pattern = '(' . preg_quote(url('/'), '/') . '.+?)([\s"#?<]|$)';
+ $matches = [];
+ preg_match_all("/$pattern/i", $content, $matches);
+ return array_unique($matches[1]);
+}
+
+function linkTargetExists(string $link): bool|null
+{
+ static $pageQueries = null;
+ static $chapterQueries = null;
+
+ if (is_null($pageQueries)) {
+ $pageQueries = app()->make(PageQueries::class);
+ $chapterQueries = app()->make(ChapterQueries::class);
+ }
+
+ // Page
+ $pageMatch = preg_match('/\/books\/(.+?)\/page\/(.+?)([\/?#\s$]|$)/i', $link, $matches);
+ if ($pageMatch) {
+ return $pageQueries->usingSlugs($matches[1], $matches[2])->exists();
+ }
+
+ // Page permalink
+ $pageMatch = preg_match('/\/link\/(\d+?)([\/?#\s$]|$)/i', $link, $matches);
+ if ($pageMatch) {
+ return Page::query()->where('id', '=', intval($matches[1]))->exists();
+ }
+
+ // Chapter
+ $chapterMatch = preg_match('/\/books\/(.+?)\/chapter\/(.+?)([\/?#\s$]|$)/i', $link, $matches);
+ if ($chapterMatch) {
+ return $chapterQueries->usingSlugs($matches[1], $matches[2])->exists();
+ }
+
+ // Book
+ $bookMatch = preg_match('/\/books\/(.+?)\/([\/?#\s$]|$)/i', $link, $matches);
+ if ($bookMatch) {
+ return Book::query()->where('slug', '=', $matches[1])->exists();
+ }
+
+ // Shelf
+ $shelfMatch = preg_match('/\/shelves\/(.*?)\/([\/?#\s$]|$)/i', $link, $matches);
+ if ($shelfMatch) {
+ return Bookshelf::query()->where('slug', '=', $matches[1])->exists();
+ }
+
+ return null;
+}
+function isLinkBroken(string $link): bool
+{
+ static $linkCache = [];
+
+ if (isset($linkCache[$link])) {
+ return $linkCache[$link];
+ }
+
+ $exists = linkTargetExists($link);
+ $isBroken = $exists === false;
+
+ $linkCache[$link] = $isBroken;
+ return $isBroken;
+}
+
+function findBrokenLinksInEntity(Entity $entity): array
+{
+ $allLinks = findLinksInContent($entity->getAttribute($entity->htmlField));
+ return array_filter($allLinks, fn($link) => isLinkBroken($link));
+}
+
+function reportForEntityType(Builder $baseQuery, string $entityType): array
+{
+ $total = $baseQuery->clone()->count();
+ $current = 0;
+ $count = 500;
+ $logLines = [];
+ $baseQuery->clone()->chunk($count, function ($entities) use ($count, &$current, $total, $entityType, &$logLines) {
+ $rangeEnd = min($count + $current, $total);
+ echo "Checking {$entityType}s [{$current}-{$rangeEnd} of {$total}]\n";
+ foreach ($entities as $entity) {
+ $brokenLinks = findBrokenLinksInEntity($entity);
+ if (count($brokenLinks) > 0) {
+ $logLines[] = "------------------------------------------------------";
+ $logLines[] = "Broken/Old URLs found in {$entityType} {$entity->getUrl()}";
+ $logLines[] = "|";
+ }
+ foreach ($brokenLinks as $brokenLink) {
+ $logLines[] = "|- $brokenLink";
+ }
+ }
+ $current = $rangeEnd;
+ });
+
+ return $logLines;
+}
+
+function findBrokenLinks(): void
+{
+ $logLines = [
+ ...reportForEntityType(Bookshelf::query(), 'shelf'),
+ ...reportForEntityType(Book::query(), 'book'),
+ ...reportForEntityType(Chapter::query(), 'chapter'),
+ ...reportForEntityType(Page::query(), 'page'),
+ ];
+
+ echo "\n=======================\nSearch Complete\n=======================\n\n";
+
+ echo implode("\n", $logLines) . "\n";
+}
+
+findBrokenLinks();