Add pg_read_binary_file() and whole-file-at-once versions of pg_read_file().
authorItagaki Takahiro <[email protected]>
Wed, 15 Dec 2010 21:56:28 +0000 (06:56 +0900)
committerItagaki Takahiro <[email protected]>
Wed, 15 Dec 2010 21:56:28 +0000 (06:56 +0900)
One of the usages of the binary version is to read files in a different
encoding from the server encoding.

Dimitri Fontaine and Itagaki Takahiro.

doc/src/sgml/func.sgml
src/backend/utils/adt/genfile.c
src/include/catalog/catversion.h
src/include/catalog/pg_proc.h
src/include/utils/builtins.h

index 21f1ddfa506aa8f8194bafc75c0c580b18333e08..7c1ba9d07f2fa7e523216e7382da8386602ad3c8 100644 (file)
@@ -14449,11 +14449,18 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
       </row>
       <row>
        <entry>
-        <literal><function>pg_read_file(<parameter>filename</> <type>text</>, <parameter>offset</> <type>bigint</>, <parameter>length</> <type>bigint</>)</function></literal>
+        <literal><function>pg_read_file(<parameter>filename</> <type>text</> [, <parameter>offset</> <type>bigint</>, <parameter>length</> <type>bigint</>])</function></literal>
        </entry>
        <entry><type>text</type></entry>
        <entry>Return the contents of a text file</entry>
       </row>
+      <row>
+       <entry>
+        <literal><function>pg_read_binary_file(<parameter>filename</> <type>text</> [, <parameter>offset</> <type>bigint</>, <parameter>length</> <type>bigint</>])</function></literal>
+       </entry>
+       <entry><type>bytea</type></entry>
+       <entry>Return the contents of a file</entry>
+      </row>
       <row>
        <entry>
         <literal><function>pg_stat_file(<parameter>filename</> <type>text</>)</function></literal>
@@ -14482,6 +14489,22 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
     at the given <parameter>offset</>, returning at most <parameter>length</>
     bytes (less if the end of file is reached first).  If <parameter>offset</>
     is negative, it is relative to the end of the file.
+    When <parameter>offset</> and <parameter>length</> parameters are omitted,
+    it returns the whole of the file.
+    The part of a file must be a valid text in the server encoding.
+   </para>
+
+   <indexterm>
+    <primary>pg_read_binary_file</primary>
+   </indexterm>
+   <para>
+    <function>pg_read_binary_file</> returns part of a file as like as
+    <function>pg_read_file</>, but the result is a bytea value.
+    One of the usages is to read a file in the specified encoding combined with
+    <function>convert_from</> function:
+<programlisting>
+SELECT convert_from(pg_read_binary_file('file_in_utf8.txt'), 'UTF8');
+</programlisting>    
    </para>
 
    <indexterm>
index e8a36edcd4dc6182e21cfd9f4cf7bb0f7022f289..e9212500c231c4839de40cf74913477b679a0710 100644 (file)
@@ -80,15 +80,14 @@ convert_and_check_filename(text *arg)
 
 
 /*
- * Read a section of a file, returning it as text
+ * Read a section of a file, returning it as bytea
+ *
+ * We read the whole of the file when bytes_to_read is nagative.
  */
-Datum
-pg_read_file(PG_FUNCTION_ARGS)
+static bytea *
+read_binary_file(text *filename_t, int64 seek_offset, int64 bytes_to_read)
 {
-   text       *filename_t = PG_GETARG_TEXT_P(0);
-   int64       seek_offset = PG_GETARG_INT64(1);
-   int64       bytes_to_read = PG_GETARG_INT64(2);
-   char       *buf;
+   bytea      *buf;
    size_t      nbytes;
    FILE       *file;
    char       *filename;
@@ -100,6 +99,29 @@ pg_read_file(PG_FUNCTION_ARGS)
 
    filename = convert_and_check_filename(filename_t);
 
+   if (bytes_to_read < 0)
+   {
+       if (seek_offset < 0)
+           bytes_to_read = -seek_offset;
+       else
+       {
+           struct stat fst;
+
+           if (stat(filename, &fst) < 0)
+               ereport(ERROR,
+                       (errcode_for_file_access(),
+                        errmsg("could not stat file \"%s\": %m", filename)));
+
+           bytes_to_read = fst.st_size - seek_offset;
+       }
+   }
+
+   /* not sure why anyone thought that int64 length was a good idea */
+   if (bytes_to_read > (MaxAllocSize - VARHDRSZ))
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                errmsg("requested length too large")));
+
    if ((file = AllocateFile(filename, PG_BINARY_R)) == NULL)
        ereport(ERROR,
                (errcode_for_file_access(),
@@ -112,18 +134,7 @@ pg_read_file(PG_FUNCTION_ARGS)
                (errcode_for_file_access(),
                 errmsg("could not seek in file \"%s\": %m", filename)));
 
-   if (bytes_to_read < 0)
-       ereport(ERROR,
-               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                errmsg("requested length cannot be negative")));
-
-   /* not sure why anyone thought that int64 length was a good idea */
-   if (bytes_to_read > (MaxAllocSize - VARHDRSZ))
-       ereport(ERROR,
-               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                errmsg("requested length too large")));
-
-   buf = palloc((Size) bytes_to_read + VARHDRSZ);
+   buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ);
 
    nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file);
 
@@ -132,15 +143,86 @@ pg_read_file(PG_FUNCTION_ARGS)
                (errcode_for_file_access(),
                 errmsg("could not read file \"%s\": %m", filename)));
 
-   /* Make sure the input is valid */
-   pg_verifymbstr(VARDATA(buf), nbytes, false);
-
    SET_VARSIZE(buf, nbytes + VARHDRSZ);
 
    FreeFile(file);
    pfree(filename);
 
-   PG_RETURN_TEXT_P(buf);
+   return buf;
+}
+
+/*
+ * In addition to read_binary_file, verify whether the contents are encoded
+ * in the database encoding.
+ */
+static text *
+read_text_file(text *filename, int64 seek_offset, int64 bytes_to_read)
+{
+   bytea *buf = read_binary_file(filename, seek_offset, bytes_to_read);
+
+   /* Make sure the input is valid */
+   pg_verifymbstr(VARDATA(buf), VARSIZE(buf) - VARHDRSZ, false);
+
+   /* OK, we can cast it as text safely */
+   return (text *) buf;
+}
+
+/*
+ * Read a section of a file, returning it as text
+ */
+Datum
+pg_read_file(PG_FUNCTION_ARGS)
+{
+   text       *filename_t = PG_GETARG_TEXT_P(0);
+   int64       seek_offset = PG_GETARG_INT64(1);
+   int64       bytes_to_read = PG_GETARG_INT64(2);
+
+   if (bytes_to_read < 0)
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                errmsg("requested length cannot be negative")));
+
+   PG_RETURN_TEXT_P(read_text_file(filename_t, seek_offset, bytes_to_read));
+}
+
+/*
+ * Read the whole of a file, returning it as text
+ */
+Datum
+pg_read_file_all(PG_FUNCTION_ARGS)
+{
+   text       *filename_t = PG_GETARG_TEXT_P(0);
+
+   PG_RETURN_TEXT_P(read_text_file(filename_t, 0, -1));
+}
+
+/*
+ * Read a section of a file, returning it as bytea
+ */
+Datum
+pg_read_binary_file(PG_FUNCTION_ARGS)
+{
+   text       *filename_t = PG_GETARG_TEXT_P(0);
+   int64       seek_offset = PG_GETARG_INT64(1);
+   int64       bytes_to_read = PG_GETARG_INT64(2);
+
+   if (bytes_to_read < 0)
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                errmsg("requested length cannot be negative")));
+
+   PG_RETURN_BYTEA_P(read_binary_file(filename_t, seek_offset, bytes_to_read));
+}
+
+/*
+ * Read the whole of a file, returning it as bytea
+ */
+Datum
+pg_read_binary_file_all(PG_FUNCTION_ARGS)
+{
+   text       *filename_t = PG_GETARG_TEXT_P(0);
+
+   PG_RETURN_BYTEA_P(read_binary_file(filename_t, 0, -1));
 }
 
 /*
index 1c3d14951c0d38a109cec893c779b76fbae2849c..1ebd7a2d27a44f4e6b8f1e5180eef4798ba7b704 100644 (file)
@@ -53,6 +53,6 @@
  */
 
 /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 201012131
+#define CATALOG_VERSION_NO 201012161
 
 #endif
index feae22e896fc060aebf857e274bcb1339fa44be3..1e6e75f5c3979e34756bfa06041dea2a189d9278 100644 (file)
@@ -3403,6 +3403,12 @@ DATA(insert OID = 2623 ( pg_stat_file        PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2249
 DESCR("return file information");
 DATA(insert OID = 2624 ( pg_read_file      PGNSP PGUID 12 1 0 0 f f f t f v 3 0 25 "25 20 20" _null_ _null_ _null_ _null_ pg_read_file _null_ _null_ _null_ ));
 DESCR("read text from a file");
+DATA(insert OID = 3826 ( pg_read_file      PGNSP PGUID 12 1 0 0 f f f t f v 1 0 25 "25" _null_ _null_ _null_ _null_ pg_read_file_all _null_ _null_ _null_ ));
+DESCR("read text from a file");
+DATA(insert OID = 3827 ( pg_read_binary_file   PGNSP PGUID 12 1 0 0 f f f t f v 3 0 17 "25 20 20" _null_ _null_ _null_ _null_ pg_read_binary_file _null_ _null_ _null_ ));
+DESCR("read bytea from a file");
+DATA(insert OID = 3828 ( pg_read_binary_file   PGNSP PGUID 12 1 0 0 f f f t f v 1 0 17 "25" _null_ _null_ _null_ _null_ pg_read_binary_file_all _null_ _null_ _null_ ));
+DESCR("read bytea from a file");
 DATA(insert OID = 2625 ( pg_ls_dir         PGNSP PGUID 12 1 1000 0 f f f t t v 1 0 25 "25" _null_ _null_ _null_ _null_ pg_ls_dir _null_ _null_ _null_ ));
 DESCR("list all files in a directory");
 DATA(insert OID = 2626 ( pg_sleep          PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2278 "701" _null_ _null_ _null_ _null_ pg_sleep _null_ _null_ _null_ ));
index a2fb7494cb4cb28ec9fc21fc34762ec989778151..1888e312f8cf7bd79d461ba9946734a2fa01696b 100644 (file)
@@ -442,6 +442,9 @@ extern Datum pg_relation_filepath(PG_FUNCTION_ARGS);
 /* genfile.c */
 extern Datum pg_stat_file(PG_FUNCTION_ARGS);
 extern Datum pg_read_file(PG_FUNCTION_ARGS);
+extern Datum pg_read_file_all(PG_FUNCTION_ARGS);
+extern Datum pg_read_binary_file(PG_FUNCTION_ARGS);
+extern Datum pg_read_binary_file_all(PG_FUNCTION_ARGS);
 extern Datum pg_ls_dir(PG_FUNCTION_ARGS);
 
 /* misc.c */