-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNonliteral-Code.py
300 lines (251 loc) · 12 KB
/
Nonliteral-Code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
""" Bhagirath Mehta and Suvir Mirchandani
Final Project: Implementing an Extension of the Rational Speech Acts Model for the
Figurative Use of Number Words
Ling 130a/230a: Introduction to Semantics and Pragmatics, Winter 2019
"""
import itertools
import math
from IPython.display import display
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
__author__ = "Bhagirath Mehta, Suvir Mirchandani"
class NonliteralNumbersRSA:
"""Implementation of the Rational Speech Acts model extended for nonliteral number words.
Based on an RSA implementation by Prof. Chris Potts
(http://web.stanford.edu/class/linguist130a/materials/rsa130a.py)
and the following work:
Kao, Justine T., Jean Y. Wu, Leon Bergen & Noah D. Goodman. 2014. Nonliteral
understanding of number words. Proceedings of the National Academy of Sciences,
111(33). 12002-12007. doi: 10.1073/pnas.1407479111.
Parameters
----------
lexicon : list
possible utterances
states : list
possible states; should be equal to lexicon based on Kao et al. (2014)
affects : list
possible affect choices; in this case, binary
s_prior: list
prior probabilities for the states
sa_prior: list of lists
prior probabilities for the affects given certain state;
example: sa_prior[0][1] is the probability of having affect=1 for the state at
index 0
round_cost: float
cost of uttering a round number
sharp_cost: float
cost of uttering a non-round number
precision: int
rounding place value; rounding happens to the nearest 10^precision
example: precision=1 considers 12 to be 10 when rounded
"""
def __init__(self, lexicon, states, affects, s_prior, sa_prior, round_cost, sharp_cost,
precision):
assert lexicon == states, "lexicon (U) must be equal to states (S)"
self.lexicon = lexicon
self.states = states
self.affects = affects
# A list of tuples for all state/affect combinations
self.meanings = list(itertools.product(self.states, self.affects))
self.s_prior = s_prior
self.sa_prior = sa_prior
self.round_cost = round_cost
self.sharp_cost = sharp_cost
self.precision = precision
def literal_listener(self):
""" Literal listener predictions for all possible states and affects given an
utterance.
Returns
-------
3D list. The first dimension corresponds to utterances, the second to states, and
the third to affects.
"""
literal = [[[0 for a in self.affects] for s in self.states] for u in self.lexicon]
for i, u in enumerate(self.lexicon):
for j, s in enumerate(self.states):
for k, a in enumerate(self.affects):
literal[i][j][k] = self.L_0(s, a, u)
return literal
def pragmatic_speaker(self):
""" Pragmatic predictions for all possible utterances given states, affects, and goals.
Returns
-------
4D list. The first dimension corresponds to goals, the second to affects, the
third to states, and the fourth to utterances.
"""
speaker = [[[[0 for u in self.lexicon] for s in self.states] for a in self.affects]
for g in self.goals()]
for i, g in enumerate(self.goals()):
for j, a in enumerate(self.affects):
for k, s in enumerate(self.states):
for l, u in enumerate(self.lexicon):
speaker[i][j][k][l] = self.S_1(u, s, a, g)
return speaker
def pragmatic_listener(self):
""" Pragmatic listener predictions for all possible states and affects given an
utterance.
Returns
-------
3D list. The first dimension corresponds to utterances, the second to states, and
the third to affects.
"""
listener = [[[0 for a in self.affects] for s in self.states] for u in self.lexicon]
for i, u in enumerate(self.lexicon):
for j, s in enumerate(self.states):
for k, a in enumerate(self.affects):
listener[i][j][k] = self.L_1(s, a, u)
return listener
def P_A(self, a, s):
"""Returns the prior probability that a certain affect `a` is true given that a
specific state is true"""
return self.sa_prior[self.states.index(s)][a]
def P_S(self, s):
"""Returns the prior probability that a certain state `s` is true"""
return self.s_prior[self.states.index(s)]
def Round(self, x):
"""Returns a rounded version of utterance, based on `self.precision`"""
return round(x, -self.precision)
def C(self, u):
"""Returns the cost of an utterance based on whether it is round or sharp"""
if u == self.Round(u):
return self.round_cost
return self.sharp_cost
def L_0(self, s, a, u):
"""Returns the literal listener's prediction probability of a state `s` and affect
`a` given an utterance `u`"""
if (s == u):
return self.P_A(a,s)
return 0
def L_0_projected(self, x, u, g):
"""Version of `L_O` projected onto a specific goal `x` (unused in our implementation)"""
prob = 0
for s_p, a_p in self.meanings:
if x == g(s_p, a_p):
prob += self.L_0(s_p, a_p, u)
return prob
def S_1(self, u, s, a, g):
"""Returns the speaker's prediction probability of an utterance `u` given a state `s`,
affect `a`, and goal `g`. Uses helper function `S_1_joint` for joint probability."""
numerator = self.S_1_joint(u, s, a, g)
# Normalization
denominator = sum([self.S_1_joint(u_p, s, a, g) for u_p in self.lexicon])
return numerator / denominator
def S_1_joint(self, u, s, a, g):
"""Returns the speaker's joint probability of an utterance `u`, state `s`, affect `a`,
and goal `g`"""
total = 0
for s_p, a_p in self.meanings:
if g(s, a) == g(s_p, a_p):
total += self.L_0(s_p, a_p, u) * math.exp(-1 * self.C(u))
return total
def L_1(self, s, a, u):
"""Returns the pragmatic listener's prediction probability of a state `s` and affect
`a` given an utterance `u`. Uses helper function `L_1_joint` for joint probability."""
numerator = self.L_1_joint(s, a, u)
# Normalization
denominator = sum([self.L_1_joint(s_p, a_p, u) for s_p in self.states for a_p in
self.affects])
return numerator / denominator
def L_1_joint(self, s, a, u):
"""Returns the pragmatic speaker's joint probabiltiy of a state `s`, affect `a`, and
utterance `u`."""
total = 0
for g in self.goals():
total += self.P_S(s) * self.P_A(a, s) * self.P_G(g) * self.S_1(u, s, a, g)
return total
def P_G(self, g):
"""Returns the probability of a particular conversational goal. Based on Kao et al. (2014),
we implemetn this as a uniform prior."""
return 1. / len(self.goals())
def goals(self):
"""Returns a list corresponding to the different conversational goals described by Kao et
al. (2014). These are functions that return (state), (affect), or (state, affect) for
either exact or approximated states, for a total of six goal functions."""
return [lambda s, a, r=r, f=f: r(f(s), a) for r in self.possible_r() for f in
self.possible_f()]
def possible_f(self):
"""Returns a list of functions `f` of state that return state or rounded state."""
return [lambda s: s,
lambda s: self.Round(s)]
def possible_r(self):
"""Returns a list of functions `r` that return (state), (affect), or (state, affect)."""
return [lambda s, a: (s,),
lambda s, a: (a,),
lambda s, a: (s,a)]
def display_listener(self, listener, title, visual):
"""Displays the probability distribution for a listener (either `self.literal_listener()
or self.pragmatic_listener()). If visual=True, displays graphically with a heatmap-like
representation for the probabilities."""
print("="*70 + "\n" + title + ": ")
if visual:
sns.set()
f, axes = plt.subplots(1, len(self.lexicon), figsize=(15,5))
f.suptitle(title, fontsize=16)
f.subplots_adjust(wspace=0.5, top=0.85, bottom=0.15)
cbar_ax = f.add_axes([.93, .15, .03, .7])
for u, given_u in enumerate(listener):
lex = pd.DataFrame(index = self.states, columns = self.affects, data = given_u)
d = lex.copy()
if visual:
fig = sns.heatmap(d, annot=True, fmt = '.2g', ax = axes[u], vmin=0, vmax=1,
linewidths=2, cmap="Blues", cbar=(u == len(listener) - 1),
cbar_ax=cbar_ax if (u == len(listener) - 1) else None)
fig.set_xlabel("Affects")
fig.set_ylabel("States")
fig.set_title("Utterance: " + str(self.lexicon[u]))
fig.tick_params(axis='x',bottom=False, top=False)
fig.tick_params(axis='y',left=False, right=False)
else:
d.loc['utterance'] = [self.lexicon[u]] + [" "]
display(d)
print
if visual:
plt.show()
def display_speaker(self, speaker, title, visual):
"""Displays the probability distribution for a speaker (self.pragmatic_speakers()).
If visual=True, displays graphically with a heatmap-like representation for the
probabilities."""
print("="*70 + "\n" + title + ": ")
goals = ["r_{}(f_{}(s),a)".format(r,f) for r in ['s','a','sa'] for f in ['e','a']]
if visual:
sns.set()
f, axes = plt.subplots(len(goals), len(self.affects), figsize=(6,12))
f.suptitle(title, fontsize=16)
f.subplots_adjust(wspace=0.3, hspace=0.8, top=0.92, bottom=0.08)
cbar_ax = f.add_axes([.93, .15, .02, .7])
for g, given_g in enumerate(speaker):
for a, given_ag in enumerate(given_g):
lex = pd.DataFrame(index = self.states, columns = self.lexicon, data = given_ag)
d = lex.copy()
if visual:
fig = sns.heatmap(d, annot=True, fmt = '.2g', ax = axes[g][a], vmin=0, vmax=1,
linewidths=2, cmap="Reds", cbar=(g == len(goals) - 1 and
a == len(self.affects) - 1), cbar_ax=cbar_ax if (g == len(goals) - 1
and a == len(self.affects) - 1) else None)
fig.set_xlabel("Utterances")
fig.set_ylabel("States")
fig.set_title("Goal: " + str(goals[g]) + "; Affect: " + str(self.affects[a]))
fig.tick_params(axis='x',bottom=False, top=False)
fig.tick_params(axis='y',left=False, right=False)
else:
d.loc['goal'] = [goals[g]] + [" "] * 2
d.loc['affect:'] = [rsa.affects[a]] + [" "] * 2
display(d)
print
if visual:
plt.show()
if __name__ == '__main__':
# Core lexicon:
S = U = [30, 32, 1000]
A = [0, 1]
s_prior = [0.495, 0.495, 0.01]
sa_prior = [ [0.9, 0.1],
[0.9, 0.1],
[0.01, 0.99] ] # in the form sa_prior[s][a] = P(a|s)
# The parameters of the model can be changed here.
rsa = NonliteralNumbersRSA(lexicon=U, states=S, affects=A, s_prior=s_prior,
sa_prior=sa_prior, round_cost=1, sharp_cost=5, precision=1)
rsa.display_listener(rsa.literal_listener(), title="Literal Listener", visual=True)
rsa.display_speaker(rsa.pragmatic_speaker(), title="Pragmatic Speaker", visual=True)
rsa.display_listener(rsa.pragmatic_listener(), title="Pragmatic Listener", visual=True)