Introduction to Innovative Projects
Iris
from sklearn.datasets import load_iris
import pandas as pd
import numpy as np
# Load the Iris dataset
dataset = load_iris()
print(dataset.data)
print(dataset.target)
print(dataset.data.shape)
# Create a DataFrame from the feature matrix with column names
X=pd.DataFrame(dataset.data,columns=dataset.feature_names)
X
# Display the first few rows of the feature matrix
print(X.head())
# Display the target variable
Y=dataset.target
Y
# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.25,random_state=0)
print(X_train.shape)
print(X_test.shape)
# Train the Decision Tree classifier with varying depths
accuracy=[]
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
for i in range(1,10):
model=DecisionTreeClassifier(max_depth=i,random_state=0)
model.fit(X_train,Y_train)
pred=model.predict(X_test)
score=accuracy_score(Y_test,pred)
accuracy.append(score)
# Plot the accuracy scores for different max depths
plt.figure(figsize=(12, 6))
plt.plot(range(1, 10), accuracy, color='red', linestyle='dashed', marker='o',
markerfacecolor='blue', markersize=10)
plt.title('Finding best Max_Depth')
plt.xlabel('Max Depth')
plt.ylabel('Accuracy Score')
plt.show()
y_pred=model.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1),Y_test.reshape(len(Y_test),1)),1))
from sklearn.metrics import accuracy_score
print("Accuracy of The model:{0}%".format(accuracy_score(Y_test,y_pred)*100))
Car Price
import pandas as pd
dataset = pd.read_csv('car-price.csv')
print(dataset.shape)
print(dataset.head)
dataset = dataset.drop(['car_ID'],axis=1)
print(dataset.head(5))
Xdata = dataset.drop('price',axis=1)
numericalCols = Xdata.select_dtypes(exclude=['object']).columns
X = Xdata[numericalCols]
X
Y = dataset['price']
print(Y.head)
from sklearn.preprocessing import scale
cols = X.columns
X = pd.DataFrame(scale(X))
X.columns = cols
X
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size=0.20,random_state=0)
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()
model.fit(x_train,y_train)
ypred=model.predict(x_test)
from sklearn.metrics import r2_score
r2score=r2_score(y_test,ypred)
print("R2Score",r2score*100)
Marks
import pandas as pd
from sklearn.linear_model import LinearRegression
from google.colab import files
uploaded = files.upload()
dataset = pd.read_csv('marks.csv')
print(dataset.shape)
print(dataset.head())
dataset.columns[dataset.isna().any()]
dataset.hours = dataset.hours.fillna(dataset.hours.mean())
X = dataset.iloc[:, :-1].values #all rows and all columns except last one
print(X.shape)
X
Y = dataset.iloc[:, -1].values #all rows and last column only
Y
model = LinearRegression()
model.fit(X,Y)
a = [[7.45, 20, 1]]
PredictedmodelResult = model.predict(a)
print(PredictedmodelResult)
Diabetes
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
from google.colab import files
uploaded=files.upload()
dataset = pd.read_csv('diabetes.csv')
dataset.head()
dataset.shape
dataset.describe()
dataset['Outcome'].value_counts()
X = dataset.drop(columns = 'Outcome', axis=1)
Y = dataset['Outcome']
print(X)
print(Y)
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.2, stratify=Y, random_state=2)
print(X.shape, X_train.shape, X_test.shape)
model = svm.SVC(kernel='linear')
model.fit(X_train, Y_train)
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
print('Accuracy score of the training data : ', training_data_accuracy)
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
print('Accuracy of the test data : ', test_data_accuracy)
input_data = (5,166,72,19,175,25.8,0.587,51)
input_data_as_numpy_array = np.asarray(input_data)
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)
prediction = model.predict(input_data_reshaped)
print(prediction)
if (prediction[0] == 0):
print('The person is not diabetic.')
else:
print('The person is diabetic.')