Pembelajaran Mesin dan Analisis Data dengan Python[18]
Metode tetangga terdekat mengukur jarak. Struktur di belakangnya adalah jarak Euclidean.
import pandas as pd import matplotlib.pyplot as plt original_data=pd.read_csv("C:/Users/Şebnem\Desktop/tutorials/cancer_data.csv") data=original_data.copy() M=data[data["diagnosis"]=="M"] B=data[data["diagnosis"]=="B"] plt.scatter(M.radius_mean,M.texture_mean,color="red",label="malignant tumor") plt.scatter(B.radius_mean,B.texture_mean,color="green",label="benign tumor") plt.legend() plt.show()
from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import accuracy_score data=data.drop(columns=["id","Unnamed: 32"],axis=1) data.diagnosis=[1 if code=="M" else 0 for code in data.diagnosis] y=data["diagnosis"] X=data.drop(columns="diagnosis",axis=1) sc=StandardScaler() X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42) X_train=sc.fit_transform(X_train) X_test=sc.transform(X_test) model=KNeighborsClassifier() model.fit(X_train,y_train) prediction=model.predict(X_test) acs=accuracy_score(y_test,prediction) #output is 94.73684210526315 success=[] for k in range(1,20): knn=KNeighborsClassifier(n_neighbors=k) knn.fit(X_train,y_train) prediction2nd=knn.predict(X_test) success.append(accuracy_score(y_test,prediction2nd)) print(max(success)) #output is0.9649122807017544 plt.plot(range(1,20),success) plt.xlabel("K") plt.ylabel("Success") plt.show()
#we modify the best k value in formula model=KNeighborsClassifier(n_neighbors=9)