Add a lookahead mechanism for reading beyond the current input line.
authorTom Lane <[email protected]>
Sat, 18 May 2019 22:21:12 +0000 (18:21 -0400)
committerTom Lane <[email protected]>
Sat, 18 May 2019 22:21:12 +0000 (18:21 -0400)
It's truly remarkable that indent has survived this long without
growing such a capability.  (The bp_save stuff seems to be a sort
of half-baked version of it, with restricted functionality and
limited buffer size.)

indent.h
io.c

index 0fffd89fbc942cb471267331559d1bbf8ebd2f9b..1708dbc19f9d56277a98a22d989abe9aaf94acc9 100644 (file)
--- a/indent.h
+++ b/indent.h
@@ -41,6 +41,8 @@ void  diag2(int, const char *);
 void   diag3(int, const char *, int);
 void   diag4(int, const char *, int, int);
 void   dump_line(void);
+int    lookahead(void);
+void   lookahead_reset(void);
 void   fill_buffer(void);
 void   parse(int);
 void   pr_comment(void);
diff --git a/io.c b/io.c
index df110947ffd0debd30b4a29055578af7fe5b82bf..fbaa5dd6817c0d1f4d6d04104c1af1712d67c736 100644 (file)
--- a/io.c
+++ b/io.c
@@ -51,6 +51,14 @@ static char sccsid[] = "@(#)io.c     8.1 (Berkeley) 6/6/93";
 
 int         comment_open;
 static int  paren_target;
+
+static char *lookahead_buf;    /* malloc'd buffer, or NULL initially */
+static char *lookahead_buf_end;        /* end+1 of allocated space */
+static char *lookahead_start;  /* => next char for fill_buffer() to fetch */
+static char *lookahead_ptr;    /* => next char for lookahead() to fetch */
+static char *lookahead_end;    /* last+1 valid char in lookahead_buf */
+static char *lookahead_bp_save;        /* lookahead position in bp_save, if any */
+
 static int pad_output(int current, int target);
 
 void
@@ -252,6 +260,73 @@ compute_label_target(void)
        : ps.ind_size * (ps.ind_level - label_offset) + 1;
 }
 
+/*
+ * Read data ahead of what has been collected into in_buffer.
+ *
+ * Successive calls get further and further ahead, until we hit EOF.
+ * Call lookahead_reset() to rescan from just beyond in_buffer.
+ *
+ * Lookahead is automatically reset whenever fill_buffer() reads beyond
+ * the lookahead buffer, i.e., you can't use this for "look behind".
+ *
+ * The standard pattern for potentially multi-line lookahead is to call
+ * lookahead_reset(), then enter a loop that scans forward from buf_ptr
+ * to buf_end, then (if necessary) calls lookahead() to read additional
+ * characters from beyond the end of the current line.
+ */
+int
+lookahead(void)
+{
+    /* First read whatever's in bp_save area */
+    if (lookahead_bp_save != NULL && lookahead_bp_save < be_save)
+       return (unsigned char) *lookahead_bp_save++;
+    /* Else, we have to examine and probably fill the main lookahead buffer */
+    while (lookahead_ptr >= lookahead_end) {
+       int         i = getc(input);
+
+       if (i == EOF)
+           return i;
+       if (i == '\0')
+           continue;           /* fill_buffer drops nulls, and so do we */
+
+       if (lookahead_end >= lookahead_buf_end) {
+           /* Need to allocate or enlarge lookahead_buf */
+           char       *new_buf;
+           size_t      req;
+
+           if (lookahead_buf == NULL) {
+               req = 64;
+               new_buf = malloc(req);
+           } else {
+               req = (lookahead_buf_end - lookahead_buf) * 2;
+               new_buf = realloc(lookahead_buf, req);
+           }
+           if (new_buf == NULL)
+               errx(1, "too much lookahead required");
+           lookahead_start = new_buf + (lookahead_start - lookahead_buf);
+           lookahead_ptr = new_buf + (lookahead_ptr - lookahead_buf);
+           lookahead_end = new_buf + (lookahead_end - lookahead_buf);
+           lookahead_buf = new_buf;
+           lookahead_buf_end = new_buf + req;
+       }
+
+       *lookahead_end++ = i;
+    }
+    return (unsigned char) *lookahead_ptr++;
+}
+
+/*
+ * Reset so that lookahead() will again scan from just beyond what's in
+ * in_buffer.
+ */
+void
+lookahead_reset(void)
+{
+    /* Reset the main lookahead buffer */
+    lookahead_ptr = lookahead_start;
+    /* If bp_save isn't NULL, we need to scan that first */
+    lookahead_bp_save = bp_save;
+}
 
 /*
  * Copyright (C) 1976 by the Board of Trustees of the University of Illinois
@@ -261,7 +336,9 @@ compute_label_target(void)
  *
  * NAME: fill_buffer
  *
- * FUNCTION: Reads one block of input into input_buffer
+ * FUNCTION: Reads one line of input into in_buffer,
+ * sets up buf_ptr and buf_end to point to the line's start and end+1.
+ * (Note that the buffer does not get null-terminated.)
  *
  * HISTORY: initial coding     November 1976   D A Willcox of CAC 1/7/77 A
  * Willcox of CAC      Added check for switch back to partly full input
@@ -279,6 +356,7 @@ fill_buffer(void)
        buf_ptr = bp_save;      /* do not read anything, just switch buffers */
        buf_end = be_save;
        bp_save = be_save = NULL;
+       lookahead_bp_save = NULL;
        if (buf_ptr < buf_end)
            return;             /* only return if there is really something in
                                 * this buffer */
@@ -293,16 +371,21 @@ fill_buffer(void)
            p = in_buffer + offset;
            in_buffer_limit = in_buffer + size - 2;
        }
-       if ((i = getc(f)) == EOF) {
+       if (lookahead_start < lookahead_end) {
+           i = (unsigned char) *lookahead_start++;
+       } else {
+           lookahead_start = lookahead_ptr = lookahead_end = lookahead_buf;
+           if ((i = getc(f)) == EOF) {
                *p++ = ' ';
                *p++ = '\n';
                had_eof = true;
                break;
+           }
        }
        if (i != '\0')
            *p++ = i;
        if (i == '\n')
-               break;
+           break;
     }
     buf_ptr = in_buffer;
     buf_end = p;