18  Intro to DuckDB

using DuckDB
using DBInterface
import DBInterface: execute as exec
import DBInterface: prepare as prep
con = DBInterface.connect(DuckDB.DB, ":memory:")
DuckDB.DB(":memory:")
DBInterface.execute(con,
    """
    CREATE TABLE ir AS SELECT * FROM
    read_csv_auto('/Users/egenn/icloud/Data/iris.csv', 
        delim=',', header=True);
    """)
(Count = Union{Missing, Int64}[150],)
exec(con, "SHOW TABLES")
(name = Union{Missing, String}["ir"],)
exec(con, "DESCRIBE ir")
(column_name = Union{Missing, String}["Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", "Species"], column_type = Union{Missing, String}["DOUBLE", "DOUBLE", "DOUBLE", "DOUBLE", "VARCHAR"], null = Union{Missing, String}["YES", "YES", "YES", "YES", "YES"], key = Union{Missing, String}[missing, missing, missing, missing, missing], default = Union{Missing, String}[missing, missing, missing, missing, missing], extra = Union{Missing, String}[missing, missing, missing, missing, missing])
exec(con,
    """
    SELECT * FROM ir
    """
)
(var"Sepal.Length" = Union{Missing, Float64}[5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9  …  6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9], var"Sepal.Width" = Union{Missing, Float64}[3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1  …  3.1, 3.1, 2.7, 3.2, 3.3, 3.0, 2.5, 3.0, 3.4, 3.0], var"Petal.Length" = Union{Missing, Float64}[1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5  …  5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5.0, 5.2, 5.4, 5.1], var"Petal.Width" = Union{Missing, Float64}[0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1  …  2.4, 2.3, 1.9, 2.3, 2.5, 2.3, 1.9, 2.0, 2.3, 1.8], Species = Union{Missing, String}["setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa"  …  "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica"])

18.1 Prepare statements

q = prep(con,
    """
    CREATE TABLE iris AS SELECT * FROM
    read_csv_auto('/Users/egenn/icloud/Data/iris.csv', 
        delim=',', header=True);
    """)
DuckDB.Stmt(DuckDB.Connection(":memory:"), Ptr{Nothing} @0x000000011f7503f0, "CREATE TABLE iris AS SELECT * FROM\nread_csv_auto('/Users/egenn/icloud/Data/iris.csv', \n    delim=',', header=True);\n", DuckDB.MaterializedResult)
exec(q)
(Count = Union{Missing, Int64}[150],)
iris = exec(con, "SELECT * FROM iris")
typeof(iris)
DuckDB.QueryResult
iris
(var"Sepal.Length" = Union{Missing, Float64}[5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9  …  6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9], var"Sepal.Width" = Union{Missing, Float64}[3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1  …  3.1, 3.1, 2.7, 3.2, 3.3, 3.0, 2.5, 3.0, 3.4, 3.0], var"Petal.Length" = Union{Missing, Float64}[1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5  …  5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5.0, 5.2, 5.4, 5.1], var"Petal.Width" = Union{Missing, Float64}[0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1  …  2.4, 2.3, 1.9, 2.3, 2.5, 2.3, 1.9, 2.0, 2.3, 1.8], Species = Union{Missing, String}["setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa"  …  "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica"])

18.2 Filter

ids = tuple(21, 22, 23, 24, 25)
exec(con,
    """
    SELECT * FROM read_csv_auto('/Users/egenn/icloud/Data/synth1.csv')
    WHERE ID in $ids;
    """
)
(ID = Union{Missing, Int64}[21, 22, 23, 24, 25], Age = Union{Missing, Float64}[43.6446078379911, 43.8365100056002, 51.1340571729247, 52.1150978723465, 36.516745383723], Group = Union{Missing, String}["A", "A", "C", "B", "C"])

18.3 Resources