Skip to content

Latest commit

 

History

History
154 lines (129 loc) · 4.48 KB

happydata.livemd

File metadata and controls

154 lines (129 loc) · 4.48 KB

Kaggle happiness dataset

Mix.install([
  {:explorer, "~> 0.6.0"},
  {:kino, "~> 0.9.0"},
  {:vega_lite, "~> 0.1.6"},
  {:kino_vega_lite, "~> 0.1.9"},
  {:kino_explorer, "~> 0.1.4"}
])

Section

alias VegaLite, as: Vl
alias Explorer.DataFrame, as: DF
alias Explorer.Series, as: Series

# {:kino_explorer, "~> 0.1.8"},

Load dataframe (almost like in Pandas but load_csv! accepts string contents instead of file path)

happy_df = DF.load_csv!(File.read!("happydata.csv"))

Show basic dataframe information

DF.describe(happy_df)
Vl.new(title: "happy")
|> Vl.data_from_values(happy_df, only: ["housecost", "happy"])
|> Vl.mark(:point)
|> Vl.encode_field(:x, "housecost", type: :quantitative)
|> Vl.encode_field(:y, "happy", type: :quantitative)
require Explorer.DataFrame
happy_df

defp means a function is private; in this case it can be skipped on first reading

defmodule Plot do
  def plot(df, col1, col2, plot_type) do
    case plot_type do
      :scatterplot -> scatterplot(df, col1, col2)
      :boxplot -> boxplot(df, col1, col2)
    end
  end

  defp get_range(srs) do
    [srs |> Series.max(), srs |> Series.min()]
  end

  defp scatterplot(df, col1, col2) do
    x_type = :quantitative
    range1 = get_range(df[col1])
    range2 = get_range(df[col2])

    Vl.new(
      title: [
        text: "Scatterplot of Generated Data",
        offset: 20
      ],
      width: 630,
      height: 630
    )
    |> Vl.data_from_values(df)
    |> Vl.mark(:circle)
    |> Vl.encode_field(:x, col1,
      type: x_type,
      scale: [domain: range1],
      axis: [grid: false]
    )
    |> Vl.encode_field(:y, col2,
      type: :quantitative,
      scale: [domain: range2],
      axis: [grid: false]
    )
  end

  defp boxplot(df, col1, col2) do
    x_type = :ordinal
    range1 = get_range(df[col1])
    range2 = get_range(df[col2])

    Vl.new(
      title: [
        text: "Boxplot of Generated Data",
        offset: 20
      ],
      width: 630,
      height: 630
    )
    |> Vl.data_from_values(df)
    |> Vl.mark(:boxplot)
    |> Vl.encode_field(:x, col1,
      type: x_type,
      scale: [domain: range1],
      axis: [grid: false]
    )
    |> Vl.encode_field(:y, col2,
      type: :quantitative,
      scale: [domain: range2],
      axis: [grid: false]
    )
  end
end
defmodule PlotHappy do
  defp columns_as_tuples(df) do
    for n <- df.names, do: {n, n}
  end

  def plot_inputs(df) do
    column_tuples = columns_as_tuples(df)
    col1 = Kino.Input.select("col1", column_tuples) |> Kino.render()
    col2 = Kino.Input.select("col2", column_tuples) |> Kino.render()
    [col1, col2]
  end

  def plot(df, [col1, col2]) do
    plot_type =
      case Series.dtype(df[Kino.Input.read(col1)]) do
        :integer -> :boxplot
        _ -> :scatterplot
      end

    Plot.plot(df, Kino.Input.read(col1), Kino.Input.read(col2), plot_type)
  end
end
plot_inputs = PlotHappy.plot_inputs(happy_df)
PlotHappy.plot(happy_df, plot_inputs)