| Input: heads, d_model, mask, q, k, v | | Output: outputs, attns | (1) | import tensorflow as tf, keras.layers.Dense, keras.callbacks.K | (2) | dk = dv = d_model//heads | (3) | qs_layer = Dense(headsdk), ks_layer = Dense(headsdk), vs_layer = Dense(headsdv) | (4) | qs = qs_layer(q), ks = ks_layer(k), vs = vs_layer(v) | (5) | qs ⟵ reshape(qs), ks ← reshape(ks), vs ← reshape(vs) | (6) | temper = tf.sqrt(tf.shape(ks)[−1]) | (7) | for i = 1 to heads do | (8) | for j = 1 to heads do | (9) | a_ij = K.batch_dot(qs[:,i,:,:], ks[:,j,:,:],axes = [2]) | (10) | if mask is not None then: | (11) | mmask = (−1e + 9)(1 − mask) | (12) | a_ij ⟵ K.Add([a_ij,mmask]) | (13) | end if | (14) | a_ij ← K.expand dims(a_ij, axis = 1) | (15) | attn.append(a_ij) | (16) | end for | (17) | end for | (18) | j = 0 | (19) | while True do | (20) | if j! = 0 and j%heads = = 0 then | (21) | break | (22) | end if | (23) | for i = 1 to heads do | (24) | a = attn[iheads + j] | (25) | temp_a.append(a) | (26) | end for | (27) | sm = K.Activation(“softmax,”K.Add(temp_a)) | (28) | output = K.batch_dot(sm[:,0,:,:],vs[:,j:,:],axes = [1,2]) | (29) | output ← K.expand_dims(output, axis = 0) | (30) | sm ← K.permute_dimensions(sm, (1, 0, 2, 3)) | (31) | outpus.append(output) | (32) | attns.append(sm) | (33) | j ← j + 1 | (34) | end while | (35) | outputs = K.concatente(outputs, axis = 0) | (36) | attns = K.concatente(attns, axis = 0) | (37) | outputs ← reshape(outputs), attns ← reshape(attns) |
|