]> BookStack Code Mirror - devops/blob - tinker-scripts/find-broken-internal-references.php
Update tinker-scripts/find-broken-internal-references.php
[devops] / tinker-scripts / find-broken-internal-references.php
1 <?php
2
3 /**
4 This script will find and report broken internal shelf/book/chapter/page links
5 within the HTML of shelf/book/chapter descriptions or page content.
6
7 This is very simplistic and used just as quick helper.
8 This script is unsupported, may have issues, and may break with future BookStack versions.
9
10 == Usage ==
11 Download the script to your system, then run it via artisan tinker on the BookStack instance.
12 The output will show scan progress, then list all detected links by shelf/book/chapter/page.
13 == Example ==
14 cd /var/www/bookstack
15 wget https://codeberg.org/bookstack/devops/raw/branch/main/tinker-scripts/find-broken-internal-references.php
16 php artisan tinker ./find-broken-internal-references.php --no-interaction
17 */
18
19 use BookStack\Entities\Models\Book;
20 use BookStack\Entities\Models\Bookshelf;
21 use BookStack\Entities\Models\Chapter;
22 use BookStack\Entities\Models\Entity;
23 use BookStack\Entities\Models\Page;
24 use BookStack\Entities\Queries\ChapterQueries;
25 use BookStack\Entities\Queries\PageQueries;
26 use Illuminate\Database\Eloquent\Builder;
27
28 function findLinksInContent(string $content): array
29 {
30     $pattern = '(' . preg_quote(url('/'), '/') . '.+?)([\s"#?<]|$)';
31     $matches = [];
32     preg_match_all("/$pattern/i", $content, $matches);
33     return array_unique($matches[1]);
34 }
35
36 function linkTargetExists(string $link): bool|null
37 {
38     static $pageQueries = null;
39     static $chapterQueries = null;
40
41     if (is_null($pageQueries)) {
42         $pageQueries = app()->make(PageQueries::class);
43         $chapterQueries = app()->make(ChapterQueries::class);
44     }
45
46     // Page
47     $pageMatch = preg_match('/\/books\/(.+?)\/page\/(.+?)([\/?#\s$]|$)/i', $link, $matches);
48     if ($pageMatch) {
49         return $pageQueries->usingSlugs($matches[1], $matches[2])->exists();
50     }
51
52     // Page permalink
53     $pageMatch = preg_match('/\/link\/(\d+?)([\/?#\s$]|$)/i', $link, $matches);
54     if ($pageMatch) {
55         return Page::query()->where('id', '=', intval($matches[1]))->exists();
56     }
57
58     // Chapter
59     $chapterMatch = preg_match('/\/books\/(.+?)\/chapter\/(.+?)([\/?#\s$]|$)/i', $link, $matches);
60     if ($chapterMatch) {
61         return $chapterQueries->usingSlugs($matches[1], $matches[2])->exists();
62     }
63
64     // Book
65     $bookMatch = preg_match('/\/books\/(.+?)([\/?#\s$]|$)/i', $link, $matches);
66     if ($bookMatch) {
67         return Book::query()->where('slug', '=', $matches[1])->exists();
68     }
69
70     // Shelf
71     $shelfMatch = preg_match('/\/shelves\/(.*?)([\/?#\s$]|$)/i', $link, $matches);
72     if ($shelfMatch) {
73         return Bookshelf::query()->where('slug', '=', $matches[1])->exists();
74     }
75
76     return null;
77 }
78 function isLinkBroken(string $link): bool
79 {
80     static $linkCache = [];
81
82     if (isset($linkCache[$link])) {
83         return $linkCache[$link];
84     }
85
86     $exists = linkTargetExists($link);
87     $isBroken = $exists === false;
88
89     $linkCache[$link] = $isBroken;
90     return $isBroken;
91 }
92
93 function findBrokenLinksInEntity(Entity $entity): array
94 {
95     $allLinks = findLinksInContent($entity->getAttribute($entity->htmlField));
96     return array_filter($allLinks, fn($link) => isLinkBroken($link));
97 }
98
99 function reportForEntityType(Builder $baseQuery, string $entityType): array
100 {
101     $total = $baseQuery->clone()->count();
102     $current = 0;
103     $count = 500;
104     $logLines = [];
105     $baseQuery->clone()->chunk($count, function ($entities) use ($count, &$current, $total, $entityType, &$logLines) {
106         $rangeEnd = min($count + $current, $total);
107         echo "Checking {$entityType}s [{$current}-{$rangeEnd} of {$total}]\n";
108         foreach ($entities as $entity) {
109             $brokenLinks = findBrokenLinksInEntity($entity);
110             if (count($brokenLinks) > 0) {
111                 $logLines[] = "------------------------------------------------------";
112                 $logLines[] = "Broken/Old URLs found in {$entityType} {$entity->getUrl()}";
113                 $logLines[] = "|";
114             }
115             foreach ($brokenLinks as $brokenLink) {
116                  $logLines[] = "|- $brokenLink";
117             }
118         }
119         $current = $rangeEnd;
120     });
121
122     return $logLines;
123 }
124
125 function findBrokenLinks(): void
126 {
127     $logLines = [
128         ...reportForEntityType(Bookshelf::query(), 'shelf'),
129         ...reportForEntityType(Book::query(), 'book'),
130         ...reportForEntityType(Chapter::query(), 'chapter'),
131         ...reportForEntityType(Page::query(), 'page'),
132     ];
133
134     echo "\n=======================\nSearch Complete\n=======================\n\n";
135
136     echo implode("\n", $logLines) . "\n";
137 }
138
139 findBrokenLinks();