Pembelajaran Mesin dan Analisis Data dengan Python[18]

Metode tetangga terdekat mengukur jarak. Struktur di belakangnya adalah jarak Euclidean.

import pandas as pd
import matplotlib.pyplot as plt
original_data=pd.read_csv("C:/Users/Şebnem\Desktop/tutorials/cancer_data.csv")
data=original_data.copy()
M=data[data["diagnosis"]=="M"]
B=data[data["diagnosis"]=="B"]

plt.scatter(M.radius_mean,M.texture_mean,color="red",label="malignant tumor")
plt.scatter(B.radius_mean,B.texture_mean,color="green",label="benign tumor")
plt.legend()
plt.show()

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

data=data.drop(columns=["id","Unnamed: 32"],axis=1)

data.diagnosis=[1 if code=="M" else 0 for code in data.diagnosis]
y=data["diagnosis"]
X=data.drop(columns="diagnosis",axis=1)
sc=StandardScaler()
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)
model=KNeighborsClassifier()
model.fit(X_train,y_train)
prediction=model.predict(X_test)

acs=accuracy_score(y_test,prediction)
#output is 94.73684210526315

success=[]
for k in range(1,20):
    knn=KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train,y_train)
    prediction2nd=knn.predict(X_test)
    success.append(accuracy_score(y_test,prediction2nd))

print(max(success))
#output is0.9649122807017544

plt.plot(range(1,20),success)
plt.xlabel("K")
plt.ylabel("Success")
plt.show()

#we modify the best k value in formula
model=KNeighborsClassifier(n_neighbors=9)