RelationalAI · gbrgr · Jun 9, 2022 · Oct 11, 2022 · Oct 11, 2022 · Oct 11, 2022
diff --git a/examples/load_csv.py b/examples/load_csv.py
@@ -31,12 +31,12 @@ def _sansext(fname: str) -> str:
 
 
 def run(database: str, engine: str, fname: str, relation: str,
-        syntax: dict, profile: str):
+        syntax: dict, schema: dict, profile: str):
     data = _read(fname)
     relation = relation or _sansext(fname)
     cfg = config.read(profile=profile)
     ctx = api.Context(**cfg)
-    rsp = api.load_csv(ctx, database, engine, relation, data, syntax)
+    rsp = api.load_csv(ctx, database, engine, relation, data, syntax, schema)
     print(json.dumps(rsp, indent=2))
 
 
@@ -57,6 +57,13 @@ def run(database: str, engine: str, fname: str, relation: str,
                    help="relation name (default: file name)")
     p.add_argument("-p", "--profile", type=str, default="default",
                    help="profile name")
+    p.add_argument(
+        "--schema", 
+        type=str, 
+        default="", 
+        help="Comma separated list of expressions `col=type` specifying that `col` has Rel type `type`."
+    )
+
     args = p.parse_args()
     syntax = {}  # find full list of syntax options in the RAI docs
     if args.header_row is not None:
@@ -67,8 +74,11 @@ def run(database: str, engine: str, fname: str, relation: str,
         syntax["escapechar"] = args.escapechar
     if args.quotechar:
         syntax["quotechar"] = args.quotechar
+
+    schema = {col: type for col, type in [pair.split("=") for pair in args.schema.split(",")]}    
+
     try:
         run(args.database, args.engine, args.file,
-            args.relation, syntax, args.profile)
+            args.relation, syntax, schema, args.profile)
     except HTTPError as e:
         show.http_error(e)
diff --git a/railib/api.py b/railib/api.py
@@ -612,27 +612,48 @@ def _gen_syntax_config(syntax: dict = {}) -> str:
     return result
 
 
-# `syntax`:
-#   * header: a map from col number to name (base 1)
-#   * header_row: row number of header, 0 means no header (default: 1)
-#   * delim: default: ,
-#   * quotechar: default: "
-#   * escapechar: default: \
-#
-# Schema: a map from col name to rel type name, eg:
-#   {'a': "int", 'b': "string"}
 def load_csv(ctx: Context, database: str, engine: str, relation: str,
-             data: str or io.TextIOBase, syntax: dict = {}) -> dict:
+             data: str or io.TextIOBase, syntax: dict = {}, schema = {}) -> dict:
+    """
+    Loads CSV data present in `data` into `database` using `engine`. Upon
+    success, parsed CSV data is stored in `relation`.
+
+    Args:
+        - `ctx` (`Context`): The RAI API context.
+        - `database` (`str`): The target database name. 
+        - `engine` (`str`): The engine used for loading.
+        - `relation` (`str`): Relation name used to store CSV data. 
+        - `data` (`str or or io.TextIOBase`): Data specified either as a string or as a stream of type `io.TextIOBase`.
+        - `syntax` (`dict`, optional): Dictionary containing parsing configuration, defaults to {}. Valid entries are:
+            - `header`: A dictionary mapping column numbers to a names.
+            - `header_row`: the row number of the header row; 0 means no header. Defaults to `1`.
+            - `delim`: Column delimiter used. Defaults to `,`.
+            - `quotechar`: Quotation character used. Defaults to `"`.
+            - `escapechar`: Escape charater used. Defaults to `\`.
+        - `schema` (`dict`, optional): Dictionary mapping column names to Rel type names. Defaults to `{}`.
+    Raises:
+        `TypeError`: If `data` is neither `str` nor `io.TextIOBase`.
+
+    Returns:
+        `dict`: The response of the query action.
+    """
     if isinstance(data, str):
         pass  # ok
     elif isinstance(data, io.TextIOBase):
         data = data.read()
     else:
         raise TypeError(f"bad type for arg 'data': {data.__class__.__name__}")
+
     inputs = {'data': data}
     command = _gen_syntax_config(syntax)
+    command += "".join(
+        [f'def config:schema[:"{col}"] = "{type}"\n' for col, type in schema.items()]
+    )
     command += ("def config:data = data\n"
-                "def insert:%s = load_csv[config]" % relation)
+                f"def insert[:{relation}] = load_csv[config]")
+
+    print(command)
+
     return query(ctx, database, engine, command, inputs=inputs, readonly=False)