|
1 | 1 | (load-string (slurp "https://raw.githubusercontent.com/scicloj/clojure-data-tutorials/main/header.edn")) |
2 | | -;; --------------- |
3 | 2 |
|
4 | | -;; This tutorial demonstrates using |
5 | | -;; the probabilistic programming library PyMC |
6 | | -;; from Clojure. |
7 | | - |
8 | | -;; We follow the linear regression example from |
9 | | -;; the [Introductory Overview of PyMC](https://www.pymc.io/projects/docs/en/stable/learn/core_notebooks/pymc_overview.html). |
10 | | - |
11 | | -;; ## Setup |
12 | | - |
13 | | -;; Relevant Clojure namespaces: |
14 | | - |
15 | | -(ns index |
16 | | - (:require [libpython-clj2.require :refer [require-python]] |
17 | | - [libpython-clj2.python :refer [py. py.. py.-] :as py] |
18 | | - [fastmath.random :as random] |
19 | | - [tablecloth.api :as tc] |
20 | | - [tablecloth.column.api :as tcc] |
21 | | - [tech.v3.datatype :as dtype] |
22 | | - [scicloj.hanamicloth.v1.plotlycloth :as ploclo] |
23 | | - [scicloj.kind-pyplot.v1.api :as pyplot] |
24 | | - [scicloj.kindly.v4.kind :as kind])) |
25 | | - |
26 | | -;; Relevant Python modules: |
27 | | - |
28 | | -(require-python '[builtins :as python] |
29 | | - 'operator |
30 | | - '[arviz :as az] |
31 | | - '[arviz.style :as az.style] |
32 | | - '[pandas :as pd] |
33 | | - '[matplotlib.pyplot :as plt] |
34 | | - '[numpy :as np] |
35 | | - '[numpy.random :as np.random] |
36 | | - '[pymc :as pm]) |
37 | | - |
38 | | -;; Some convenience functions to access Python idioms: |
39 | | - |
40 | | -(defn brackets [obj entry] |
41 | | - (py. obj __getitem__ entry)) |
42 | | - |
43 | | -(def colon |
44 | | - (python/slice nil nil)) |
45 | | - |
46 | | -;; Theme for ArViZ visualizations: |
47 | | - |
48 | | -(arviz.style/use "arviz-darkgrid") |
49 | | - |
50 | | -;; ## Synthetic data |
51 | | - |
52 | | -(def random-seed 8927) |
53 | | - |
54 | | -(def dataset-size 101) |
55 | | - |
56 | | -(def true-parameter-values |
57 | | - {:alpha 1 |
58 | | - :sigma 1 |
59 | | - :beta [1 2.5]}) |
60 | | - |
61 | | -;; We will generate a dataset by the following recipe: |
62 | | - |
63 | | -(defn gen-dataset [{:keys [size random-seed |
64 | | - alpha sigma beta]}] |
65 | | - (let [rng (random/rng :isaac random-seed)] |
66 | | - (-> {:x1 (take size (random/->seq rng)) |
67 | | - :x2 (-> (take size (random/->seq rng)) |
68 | | - (tcc/* 0.2))} |
69 | | - tc/dataset |
70 | | - (tc/add-column :y |
71 | | - #(-> (tcc/+ alpha |
72 | | - (tcc/* (beta 0) (:x1 %)) |
73 | | - (tcc/* (beta 1) (:x2 %)) |
74 | | - (tcc/* sigma |
75 | | - (dtype/make-reader |
76 | | - :float32 size (rand))))))))) |
| 3 | +^:kindly/hide-code |
| 4 | +(ns index) |
77 | 5 |
|
78 | | -(def dataset |
79 | | - (gen-dataset (merge {:random-seed random-seed |
80 | | - :size dataset-size} |
81 | | - true-parameter-values))) |
| 6 | +;; # Preface |
82 | 7 |
|
83 | | -(tc/head dataset) |
84 | | - |
85 | | -;; Let us visualize our dataset: |
86 | | - |
87 | | -(->> [:x1 :x2] |
88 | | - (mapv (fn [x] |
89 | | - (-> dataset |
90 | | - (ploclo/layer-point |
91 | | - {:=x :x1})))) |
92 | | - kind/fragment) |
93 | | - |
94 | | -;; ## Using PyMC |
95 | | - |
96 | | -pm/__version__ |
97 | | - |
98 | | -;; Let us define a Bayesian model for our data: |
99 | | - |
100 | | -(def basic-model (pm/Model)) |
101 | | - |
102 | | -(py/with [_ basic-model] |
103 | | - (let [{:keys [x1 x2 y]} (-> dataset |
104 | | - (update-vals np/array)) |
105 | | - alpha (pm/Normal "alpha" |
106 | | - :mu 0 |
107 | | - :sigma 10) |
108 | | - beta (pm/Normal "beta" |
109 | | - :mu 0 |
110 | | - :sigma 10 |
111 | | - :shape 2) |
112 | | - sigma (pm/HalfNormal "sigma" |
113 | | - :sigma 1) |
114 | | - mu (operator/add alpha |
115 | | - (operator/mul (brackets beta 0) |
116 | | - x1) |
117 | | - (operator/mul (brackets beta 0) |
118 | | - x2)) |
119 | | - y_obs (pm/Normal "y_obs" |
120 | | - :mu mu |
121 | | - :sigma sigma |
122 | | - :observed y)])) |
123 | | - |
124 | | -;; Now we can sample from the posterior: |
125 | | - |
126 | | -(def idata |
127 | | - (py/with [_ basic-model] |
128 | | - (pm/sample))) |
129 | | - |
130 | | -;; Here is the resulting structure: |
131 | | - |
132 | | -(-> idata |
133 | | - (py.- posterior) |
134 | | - (py.- alpha) |
135 | | - (py. sel :draw (python/slice 0 4))) |
136 | | - |
137 | | -;; Alternativelty, we could also use the Slice sampling algorithm |
138 | | -;; instead of the default NUTS. |
139 | | - |
140 | | -(def slice-idata |
141 | | - (py/with [_ basic-model] |
142 | | - (let [step (pm/Slice)] |
143 | | - (pm/sample 5000 :step step)))) |
144 | | - |
145 | | -slice-idata |
146 | | - |
147 | | -;; ## Posterior analysis |
148 | | - |
149 | | -;; Let us plot our sampling using ArViZ: |
| 8 | +;; These tutorials demonstrate using |
| 9 | +;; the probabilistic programming library [PyMC](https://www.pymc.io/) |
| 10 | +;; from Clojure. |
150 | 11 |
|
151 | | -(pyplot/pyplot |
152 | | - #(az/plot_trace idata :combined true)) |
| 12 | +;; * [Intro](./intro.html) |
0 commit comments