4 This script will find and report broken internal shelf/book/chapter/page links
5 within the HTML of shelf/book/chapter descriptions or page content.
7 This is very simplistic and used just as quick helper.
8 This script is unsupported, may have issues, and may break with future BookStack versions.
11 Download the script to your system, then run it via artisan tinker on the BookStack instance.
12 The output will show scan progress, then list all detected links by shelf/book/chapter/page.
15 wget https://codeberg.org/bookstack/devops/raw/branch/main/tinker-scripts/find-broken-internal-references.php
16 php artisan tinker ./find-broken-internal-references.php --no-interaction
19 use BookStack\Entities\Models\Book;
20 use BookStack\Entities\Models\Bookshelf;
21 use BookStack\Entities\Models\Chapter;
22 use BookStack\Entities\Models\Entity;
23 use BookStack\Entities\Models\Page;
24 use BookStack\Entities\Queries\ChapterQueries;
25 use BookStack\Entities\Queries\PageQueries;
26 use Illuminate\Database\Eloquent\Builder;
28 function findLinksInContent(string $content): array
30 $pattern = '(' . preg_quote(url('/'), '/') . '.+?)([\s"#?<]|$)';
32 preg_match_all("/$pattern/i", $content, $matches);
33 return array_unique($matches[1]);
36 function linkTargetExists(string $link): bool|null
38 static $pageQueries = null;
39 static $chapterQueries = null;
41 if (is_null($pageQueries)) {
42 $pageQueries = app()->make(PageQueries::class);
43 $chapterQueries = app()->make(ChapterQueries::class);
47 $pageMatch = preg_match('/\/books\/(.+?)\/page\/(.+?)([\/?#\s$]|$)/i', $link, $matches);
49 return $pageQueries->usingSlugs($matches[1], $matches[2])->exists();
53 $pageMatch = preg_match('/\/link\/(\d+?)([\/?#\s$]|$)/i', $link, $matches);
55 return Page::query()->where('id', '=', intval($matches[1]))->exists();
59 $chapterMatch = preg_match('/\/books\/(.+?)\/chapter\/(.+?)([\/?#\s$]|$)/i', $link, $matches);
61 return $chapterQueries->usingSlugs($matches[1], $matches[2])->exists();
65 $bookMatch = preg_match('/\/books\/(.+?)([\/?#\s$]|$)/i', $link, $matches);
67 return Book::query()->where('slug', '=', $matches[1])->exists();
71 $shelfMatch = preg_match('/\/shelves\/(.*?)([\/?#\s$]|$)/i', $link, $matches);
73 return Bookshelf::query()->where('slug', '=', $matches[1])->exists();
78 function isLinkBroken(string $link): bool
80 static $linkCache = [];
82 if (isset($linkCache[$link])) {
83 return $linkCache[$link];
86 $exists = linkTargetExists($link);
87 $isBroken = $exists === false;
89 $linkCache[$link] = $isBroken;
93 function findBrokenLinksInEntity(Entity $entity): array
95 $allLinks = findLinksInContent($entity->getAttribute($entity->htmlField));
96 return array_filter($allLinks, fn($link) => isLinkBroken($link));
99 function reportForEntityType(Builder $baseQuery, string $entityType): array
101 $total = $baseQuery->clone()->count();
105 $baseQuery->clone()->chunk($count, function ($entities) use ($count, &$current, $total, $entityType, &$logLines) {
106 $rangeEnd = min($count + $current, $total);
107 echo "Checking {$entityType}s [{$current}-{$rangeEnd} of {$total}]\n";
108 foreach ($entities as $entity) {
109 $brokenLinks = findBrokenLinksInEntity($entity);
110 if (count($brokenLinks) > 0) {
111 $logLines[] = "------------------------------------------------------";
112 $logLines[] = "Broken/Old URLs found in {$entityType} {$entity->getUrl()}";
115 foreach ($brokenLinks as $brokenLink) {
116 $logLines[] = "|- $brokenLink";
119 $current = $rangeEnd;
125 function findBrokenLinks(): void
128 ...reportForEntityType(Bookshelf::query(), 'shelf'),
129 ...reportForEntityType(Book::query(), 'book'),
130 ...reportForEntityType(Chapter::query(), 'chapter'),
131 ...reportForEntityType(Page::query(), 'page'),
134 echo "\n=======================\nSearch Complete\n=======================\n\n";
136 echo implode("\n", $logLines) . "\n";