// Copyright (C) 2025 The Qt Company Ltd. // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 using System; using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; namespace QtVsTools.SyntaxAnalysis { public abstract partial class RegExpr { //////////////////////////////////////////////////////////////////////////////////////////// /// /// RegExpr.Parser /// //////////////////////////////////////////////////////////////////////////////////////////// /// /// Rendering of /// public partial class Parser { Renderer Renderer { get; } Pattern Pattern { get; set; } public Regex Regex { get; private set; } internal Parser(RegExpr expr, RegExpr defaultTokenWs = null) { Renderer = new Renderer(); Refresh(expr, defaultTokenWs); } /// /// Parse input text and return productions. /// /// /// The parsing procedure will first calculate the parse tree corresponding to the input /// text, given the token data captured. The parse tree is then used to generate all /// productions, according to the production rules defined for each token. /// (see also ) /// /// Text to be parsed. /// Productions by token id public ProductionObjects Parse(string text) { var parseTree = GetParseTree(text); return GetProductionObjects(parseTree); } public void Refresh(RegExpr expr, RegExpr defaultTokenWs = null) { // Render Regex string Pattern = Renderer.RenderPattern(expr, defaultTokenWs); // Compile Regex Regex = new Regex(Pattern.ExprRender, RegexOptions.Multiline); } /// /// Parse input text using Regex and generate corresponding parse tree. /// /// Text to be parsed /// Parse tree ParseTree GetParseTree(string text) { // Match regex pattern var nodes = new List(); var matches = Regex.Matches(text); if (matches.Count == 0) throw new ParseErrorException(); foreach (Match match in matches) { if (!match.Success || match.Length == 0) { if (nodes.Any()) continue; throw new ParseErrorException(); } // Flat list of parse-tree nodes, from Regex captures var matchNodes = match.Groups.Cast() .SelectMany((group, groupIdx) => group.Captures.Cast() .Where(capture => !string.IsNullOrEmpty(capture.Value)) .Select((capture, captureIdx) => new ParseTree.Node { CaptureId = Regex.GroupNameFromNumber(groupIdx), Token = Pattern.Tokens[Regex.GroupNameFromNumber(groupIdx)], Value = capture.Value, Begin = capture.Index, End = capture.Index + capture.Length, GroupIdx = groupIdx, CaptureIdx = captureIdx })) .OrderBy(c => c.Begin) .ToList(); nodes.AddRange(matchNodes); } // Node list partitioned by token var nodesByToken = nodes .GroupBy(node => node.Token) .ToDictionary(g => g.Key, g => g.ToArray()); foreach (var node in nodes.Where(n => n.Token != Pattern.Root)) { // Get nodes captured by parent token if (!node.Token.Parents.TryGetValue(node.CaptureId, out Token parentToken)) throw new ParseErrorException("Unknown capture ID"); if (!nodesByToken.TryGetValue(parentToken, out ParseTree.Node[] parentNodes)) throw new ParseErrorException("Missing parent nodes"); // Find parent node int idx = Array.BinarySearch(parentNodes, node, ParseTree.Node.Comparer); if (idx < 0) { idx = (~idx) - 1; if (idx < 0) throw new ParseErrorException("Parent node not found"); } // Attach to parent node (node.Parent = parentNodes[idx]).ChildNodes.Add(node.Begin, node); } var topNodes = nodesByToken[Pattern.Root]; if (topNodes.Length == 1) return topNodes[0]; var root = new ParseTree.Node { CaptureId = string.Empty, Token = null, Value = text, Begin = 0, End = text.Length, GroupIdx = -1, CaptureIdx = -1 }; foreach (var node in nodesByToken[Pattern.Root]) (node.Parent = root).ChildNodes.Add(node.Begin, node); return root; } } public class ParseErrorException : RegExprException { public ParseErrorException(string message = null) : base(message) { } } } }