I did 4 data points for training and 10000 for training running each 1000 times. Here’s the code:
import numpy as np
from sklearn.linear_model import Perceptron
from itertools import product
from random import randint
import matplotlib.pyplot as plt
from collections import Counter
validation = [[0,0], [1,1],[0.5,0.5]]
def generate(m):
'''Generates a data and label set of size m'''
d,l = [], []
for i in range(m):
x,y = randint(0,1), randint(0,1)
d.append([x,y])
l.append(x and y)
return d,l
def large(n, m):
'''Takes number of simulations, n, and number of data
Points per data set, m, as input. Returns two Counter
Objects: coefficients and decision function results'''
coef_set = []
dv_set = []
for i in range(n):
d,l = [],[]
while len(set(l)) < 2:
d,l = generate(m)
clf = Perceptron(max_iter=40)
clf.fit(d,l)
coef_set.append(tuple(clf.coef_.tolist()[0]))
dv_set.append(tuple(clf.decision_function(validation).tolist()))
print('\r',str(round((i+1)/n *100, 2))+'%', end='', flush=True)
return Counter(coef_set), Counter(dv_set)
def graph(x,color='blue', title=''):
'''Takes Counter object, title and color and generates
distribution plot'''
labels,values = zip(*x.items())
i = np.arange(len(labels))
values = tuple([v/sum(values) for v in values])
width = 1
plt.figure(figsize=(12,5))
plt.bar(i,values,width,color=color, alpha=0.5)
plt.xticks(i,labels)
plt.title(title)
plt.show()
# example usage
c, d = large(1000,4)
graph(c,'red',
'Coefficent Distribution: 1000 iterations with 4 data points'
)
graph(d,'blue',
'Decision Function Distribution: 1000 iterations with 4 data points'
)