Mix.install([
{:explorer, "~> 0.6.0"},
{:kino, "~> 0.9.0"},
{:vega_lite, "~> 0.1.6"},
{:kino_vega_lite, "~> 0.1.9"},
{:kino_explorer, "~> 0.1.4"}
])
alias VegaLite, as: Vl
alias Explorer.DataFrame, as: DF
alias Explorer.Series, as: Series
# {:kino_explorer, "~> 0.1.8"},
Load dataframe (almost like in Pandas but load_csv! accepts string contents instead of file path)
happy_df = DF.load_csv!(File.read!("happydata.csv"))
Show basic dataframe information
DF.describe(happy_df)
Vl.new(title: "happy")
|> Vl.data_from_values(happy_df, only: ["housecost", "happy"])
|> Vl.mark(:point)
|> Vl.encode_field(:x, "housecost", type: :quantitative)
|> Vl.encode_field(:y, "happy", type: :quantitative)
require Explorer.DataFrame
happy_df
defp
means a function is private; in this case it can be skipped on first reading
defmodule Plot do
def plot(df, col1, col2, plot_type) do
case plot_type do
:scatterplot -> scatterplot(df, col1, col2)
:boxplot -> boxplot(df, col1, col2)
end
end
defp get_range(srs) do
[srs |> Series.max(), srs |> Series.min()]
end
defp scatterplot(df, col1, col2) do
x_type = :quantitative
range1 = get_range(df[col1])
range2 = get_range(df[col2])
Vl.new(
title: [
text: "Scatterplot of Generated Data",
offset: 20
],
width: 630,
height: 630
)
|> Vl.data_from_values(df)
|> Vl.mark(:circle)
|> Vl.encode_field(:x, col1,
type: x_type,
scale: [domain: range1],
axis: [grid: false]
)
|> Vl.encode_field(:y, col2,
type: :quantitative,
scale: [domain: range2],
axis: [grid: false]
)
end
defp boxplot(df, col1, col2) do
x_type = :ordinal
range1 = get_range(df[col1])
range2 = get_range(df[col2])
Vl.new(
title: [
text: "Boxplot of Generated Data",
offset: 20
],
width: 630,
height: 630
)
|> Vl.data_from_values(df)
|> Vl.mark(:boxplot)
|> Vl.encode_field(:x, col1,
type: x_type,
scale: [domain: range1],
axis: [grid: false]
)
|> Vl.encode_field(:y, col2,
type: :quantitative,
scale: [domain: range2],
axis: [grid: false]
)
end
end
defmodule PlotHappy do
defp columns_as_tuples(df) do
for n <- df.names, do: {n, n}
end
def plot_inputs(df) do
column_tuples = columns_as_tuples(df)
col1 = Kino.Input.select("col1", column_tuples) |> Kino.render()
col2 = Kino.Input.select("col2", column_tuples) |> Kino.render()
[col1, col2]
end
def plot(df, [col1, col2]) do
plot_type =
case Series.dtype(df[Kino.Input.read(col1)]) do
:integer -> :boxplot
_ -> :scatterplot
end
Plot.plot(df, Kino.Input.read(col1), Kino.Input.read(col2), plot_type)
end
end
plot_inputs = PlotHappy.plot_inputs(happy_df)
PlotHappy.plot(happy_df, plot_inputs)