1:-[matrix]. 2
3
5
7:-set_random(seed(777)). 8
10rand_vector(0,_,[]):-!.
11rand_vector(D,[A,B],[X|R]):-
12 D1 is D - 1,
13 random(P),
14 X is P * (B - A) + A,
15 rand_vector(D1,[A,B],R),
16 !.
17
19rand_matrix(0,_,_,[]):-!.
20rand_matrix(P,Q,[A,B],[X|R]):-
21 P1 is P - 1,
22 rand_vector(Q,[A,B],X),
23 rand_matrix(P1,Q,[A,B],R),
24 !.
25
27
28:-dynamic weight_matrix/2. 29:-dynamic bias_vector/2. 30:-dynamic activation/2. 31:-dynamic layer_input/2. 32:-dynamic layer_output/2. 33:-dynamic layer_out_diff/2. 34:-dynamic layer_in_diff/2. 35:-dynamic layer_bias_grad/2. 36:-dynamic layer_weight_grad/2. 37
38
39layer_init(Name,InDim,OutDim,Act,[S1,S2]):-
40 retractall(weight_matrix(Name,_)),
41 retractall(bias_vector(Name,_)),
42 retractall(activation(Name,_)),
43 rand_matrix(OutDim,InDim,[S1,S2],W),
44 assert(
45 weight_matrix(
46 Name,
47 W
48 )
49 ),
50 rand_vector(OutDim,[S1,S2],B),
51 assert(
52 bias_vector(
53 Name,
54 B
55 )
56 ),
57 assert(
58 activation(
59 Name,
60 Act
61 )
62 ),
63 print([layer,Name,has,been,initialised]),nl,
64 !.
65
66
68
70
71relu(X,0):-
72 X < 0,
73 !.
74relu(X,30):-
75 X > 30,
76 !.
77relu(X,X):-!.
78
79
80relu_diff(X,0):-
81 X < 0,
82 !.
83relu_diff(X,0):-
84 X >30,
85 !.
86relu_diff(_,1):-!.
87
88
89:-discontiguous vec_act/3. 90:-discontiguous vec_act_diff/3. 91
92vec_act(relu,V,R):-
93 maplist(relu,V,R),
94 !.
95vec_act_diff(relu,V,R):-
96 maplist(relu_diff,V,R),
97 !.
98
99
100
102exp(X,Y):-
103 Y is exp(X),
104 !.
105softmax_sub1(A,X,Y):-
106 Y is X / A,
107 !.
108softmax(V,R):-
109 maplist(exp,V,P),
110 sumlist(P,S),
111 maplist(softmax_sub1(S),P,R),
112 !.
113
114softmax_diff_sub(_,1):-!.
115softmax_diff(V,R):-
116 maplist(softmax_diff_sub,V,R),
117 !.
118
119vec_act(softmax,V,R):-
120 softmax(V,R),
121 !.
122vec_act_diff(softmax,V,R):-
123 softmax_diff(V,R),
124 !.
125
127ce_sub1(A,B,S):-
128 S is A - B,
129 !.
130neg_t_ln_y(Y,T,R):-
131 R is (-1) * T * log(Y),
132 !.
133ce_error(Y,T,E):-
134 maplist(neg_t_ln_y,Y,T,P),
135 sumlist(P,E),
136 !.
137ce_diff(Y,T,D):-
138 maplist(ce_sub1,T,Y,D),
139 !.
140
142
143nnet_forward([],In,In):-!.
144nnet_forward([Name|LayerList],In,Out):-
145 weight_matrix(Name,W),
146 bias_vector(Name,B),
147 activation(Name,Act),
148 retractall(layer_input(Name,_)),
149 assert(layer_input(Name,In)),
150 151 transpose(In,InT),
152 mat_mult_mat(W,InT,X),
154 transpose(X,XT),
155 maplist(vec_add_vec(B),XT,ZT),
157 maplist(vec_act(Act),ZT,Y),
159 retractall(layer_output(Name,_)),
160 assert(layer_output(Name,Y)),
161 162 nnet_forward(LayerList,Y,Out),
163 !.
164
166nnet_comp_error(LayerList,Tgt,Err,Diff):-
167 append(_,[Name],LayerList),
168 layer_output(Name,Y),
169 maplist(ce_error,Y,Tgt,ErrList),
170 sumlist(ErrList,ErrTot),
171 length(ErrList,NumData),
172 Err is ErrTot / NumData,
173 maplist(ce_diff,Y,Tgt,Diff),
174 175 176 !.
177
179nnet_backward([],_,_):-!.
180nnet_backward(LayerList,Diff,LRate):-
181 append(L1,[Name],LayerList),
182 weight_matrix(Name,W),
183 bias_vector(Name,B),
184 activation(Name,Act),
185 layer_output(Name,Y),
186 maplist(vec_act_diff(Act),Y,ActDiff),
187 maplist(vec_mult_vec,Diff,ActDiff,BDiff),
188 transpose(BDiff,BDT),
189 maplist(sumlist,BDT,BGrad),
190 retractall(layer_bias_grad(Name,_)),
191 assert(layer_bias_grad(Name,BGrad)),
192 layer_input(Name,In),
193 mat_mult_mat(BDT,In,WGrad),
194 retractall(layer_weight_grad(Name,_)),
195 assert(layer_weight_grad(Name,WGrad)),
196 mat_mult_mat(BDiff,W,InDiff),
197 198 nnet_backward(L1,InDiff,LRate),
199 200 mat_mult_const(WGrad,LRate,DW),
201 mat_add_mat(W,DW,WNew),
202 retractall(weight_matrix(Name,_)),
203 assert(weight_matrix(Name,WNew)),
204 vec_mult_const(BGrad,LRate,DB),
205 vec_add_vec(B,DB,BNew),
206 retractall(bias_vector(Name,_)),
207 assert(bias_vector(Name,BNew)),
208 !.
209
210nnet_train(_,_,_,0,_):-!.
211nnet_train(Nnet,In,Tgt,Iter,LRate):-
212 I1 is Iter - 1,
213 nnet_forward(Nnet,In,_),
214 nnet_comp_error(Nnet,Tgt,Err,Diff),
215 printerr(Err),
216 nnet_backward(Nnet,Diff,LRate),
217 nnet_train(Nnet,In,Tgt,I1,LRate).
218
219printerr(X):-
220 Y is X * 1000000,
221 round(Y,Z),
222 R is Z / 1000000,
223 print(R),nl,
224 !.
225
226try:-
227 NumHid = 32,
228 layer_init(try1,4,NumHid,relu,[-0.2,0.2]),
229 layer_init(try2,NumHid,NumHid,relu,[-0.2,0.2]),
230 layer_init(try3,NumHid,NumHid,relu,[-0.2,0.2]),
231 layer_init(try4,NumHid,NumHid,relu,[-0.2,0.2]),
232 layer_init(try5,NumHid,NumHid,relu,[-0.2,0.2]),
233 layer_init(try6,NumHid,5,softmax,[-0.2,0.2]),
234 In = [[2,3,4,5],[6,7,8,9]],
235 Tgt = [[0,1,0,0,0],[0,0,0,1,0]],
236 Nnet = [try1,try2,try3,try4,try5,try6],
237 nnet_train(Nnet,In,Tgt,100,0.01)