]> BookStack Code Mirror - website/commitdiff
Added search system config files
authorDan Brown <redacted>
Thu, 27 Oct 2022 20:38:44 +0000 (21:38 +0100)
committerDan Brown <redacted>
Thu, 27 Oct 2022 20:38:44 +0000 (21:38 +0100)
readme.md
search/config.json [new file with mode: 0644]
search/docker-compose.yml [new file with mode: 0644]
search/scrape.sh [new file with mode: 0644]

index f546102312b4ae59992e810085c04e773d0a2c1f..91ab18b6b5519b998b03860c3721b2c2ff07c51f 100644 (file)
--- a/readme.md
+++ b/readme.md
@@ -16,3 +16,9 @@ The "extended" version of hugo must be used (Has support for certain features li
 The theme is custom made with snippets taken from the [hugo capser theme](https://github.com/vjeantet/hugo-theme-casper).
 
 SCSS is used for the styling. Install NPM dependencies via `npm install` or `yarn` then you can use `npm run build` to build the css and site once or `npm run dev` to watch for changes.
+
+
+### Search
+
+Search is handled via [Meilisearch](https://www.meilisearch.com/). A nightly scrape runs to index the site pages for search.
+This is all docker-based, and the configuration used can be found in the `search/` directory of this repo.
\ No newline at end of file
diff --git a/search/config.json b/search/config.json
new file mode 100644 (file)
index 0000000..951b870
--- /dev/null
@@ -0,0 +1,79 @@
+{
+    "index_uid": "docs",
+    "start_urls": [
+        {
+            "url": "https://www.bookstackapp.com/docs/",
+            "page_rank": 5,
+            "selectors_key": "docs"
+        },
+        {
+            "url": "https://www.bookstackapp.com/blog/",
+            "page_rank": 1,
+            "selectors_key": "blog"
+        },
+        "https://www.bookstackapp.com/"
+    ],
+    "sitemap_urls": [
+        "https://www.bookstackapp.com/sitemap.xml"
+    ],
+    "stop_urls": [
+        "https://www.bookstackapp.com/categories/",
+        "https://www.bookstackapp.com/tags/",
+        "https://www.bookstackapp.com/blog/page/"
+    ],
+    "selectors": {
+        "default": {
+            "lvl0": {
+                "selector": ".homepage h1",
+                "global": true,
+                "default_value": "Website"
+            },
+            "lvl1": {
+                "selector": ".homepage  h1",
+                "global": true,
+                "default_value": "Homepage"
+            },
+            "lvl2": ".homepage h2",
+            "lvl3": ".homepage h3",
+            "lvl4": ".homepage h4",
+            "text": ".homepage p"
+        },
+        "docs": {
+            "lvl0": {
+                "selector": ".docs-section-title",
+                "global": true,
+                "default_value": "Documentation"
+            },
+            "lvl1": {
+                "selector": "main h1",
+                "global": true,
+                "default_value": "Topic"
+            },
+            "lvl2": "main h2",
+            "lvl3": "main h3",
+            "lvl4": "main h4",
+            "text": "main > *"
+        },
+        "blog": {
+            "lvl0": {
+                "selector": "a[href=\"/blog\"]",
+                "global": true,
+                "default_value": "Blogpost"
+            },
+            "lvl1": {
+                "selector": "main h1",
+                "global": true,
+                "default_value": "Topic"
+            },
+            "lvl2": "main h2",
+            "lvl3": "main h3",
+            "lvl4": "main h4",
+            "text": "main > *"
+        }
+    },
+    "selectors_exclude": [
+        ".footer-content",
+        ".edit-link",
+        ".icon"
+    ]
+}
\ No newline at end of file
diff --git a/search/docker-compose.yml b/search/docker-compose.yml
new file mode 100644 (file)
index 0000000..5d98ce6
--- /dev/null
@@ -0,0 +1,15 @@
+version: "3.3"
+services:
+  search:
+    image: getmeili/meilisearch:latest
+    restart: always
+    volumes:
+      - ./data:/home/meili/data.ms
+    ports:
+      - 7700:7700
+    environment:
+      - MEILI_ENV=production
+      - MEILI_MASTER_KEY=abc123
+volumes:
+  data:
+    driver: local
\ No newline at end of file
diff --git a/search/scrape.sh b/search/scrape.sh
new file mode 100644 (file)
index 0000000..bae0da4
--- /dev/null
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+# This is a script that's typically ran via cront to start a scraping operation
+# so the search index is updated.
+# Cron example:
+# 2 3 * * * /var/www/meilisearch/scrape.sh >> /dev/null 2>&1
+
+docker run -t --rm \
+  --network=host \
+  -e MEILISEARCH_HOST_URL='http://localhost:7700' \
+  -e MEILISEARCH_API_KEY='def456' \
+  -v /var/www/meilisearch/config.json:/docs-scraper/config.json \
+  getmeili/docs-scraper:latest pipenv run ./docs_scraper config.json
\ No newline at end of file