practise

program 5 - Random Forest Classification – Weather and Iris Dataset

 1
 2import pandas as pd import numpy as np
 3import matplotlib.pyplot as plt import seaborn as sns
 4from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.preprocessing import LabelEncoder
 5from sklearn.tree import plot_tree
 6from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
 7# Dataset
 8data = pd.DataFrame({
 9'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast',
10'Sunny', 'Sunny', 'Rain', 'Sunny', 'Overcast', 'Overcast', 'Rain'],
11'Temp': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool',
12'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild'],
13'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal',
14'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High'],
15'Wind': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong',
16'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Strong'],
17'PlayTennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes',
18'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
19})
20# Encoding
21le = LabelEncoder()
22for column in data.columns:
23data[column] = le.fit_transform(data[column])
24
25# Splitting data
26X = data.drop('PlayTennis', axis=1) y = data['PlayTennis']
27X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=24)
28
29# Random Forest model
30rf = RandomForestClassifier(n_estimators=10, random_state=24) rf.fit(X_train, y_train)
31
32# Prediction
33y_pred = rf.predict(X_test)
34
35# Evaluation
36print("Accuracy:", accuracy_score(y_test, y_pred)) 
37plt.show()
38# Visualizing one tree plt.figure(figsize=(6,8))
39plot_tree(rf.estimators_[0], filled=True, feature_names=X.columns, class_names=["No", "Yes"])
40plt.title("Decision Tree from Random Forest") plt.show()
41
42
43# Random Forest Classification – Iris Dataset
44
45
46from sklearn.datasets import load_iris iris = load_iris()
47X = pd.DataFrame(iris.data, columns=iris.feature_names)
48y = iris.target
49
50X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
51
52rf_iris = RandomForestClassifier(n_estimators=10, random_state=42) rf_iris.fit(X_train, y_train)
53y_pred = rf_iris.predict(X_test)
54print("Accuracy:", accuracy_score(y_test, y_pred))
55print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred)) print("Classification Report:\n", classification_report(y_test, y_pred))
56# Feature Importance plt.figure(figsize=(6,5))
57sns.barplot(x=rf_iris.feature_importances_, y=iris.feature_names) plt.title("Feature Importances - Iris Dataset")
58plt.show()
59# Visualizing one tree plt.figure(figsize=(10,6))
60plot_tree(rf_iris.estimators_[0], filled=True, feature_names=iris.feature_names, class_names=iris.target_names)
61plt.title("Decision Tree from Random Forest - Iris") plt.show()

program 6 - KNN

 1import numpy as np import pandas as pd
 2import matplotlib.pyplot as plt
 3from sklearn.preprocessing import LabelEncoder from sklearn.neighbors import KNeighborsClassifier
 4
 5# Step 1: Create the dataset data = 
 6{
 7"Name": ["Ajay", "Mark", "Sara", "Zaira", "Sachin", "Rahul", "Pooja", "Smith", "Laxmi", "Michael"],
 8"Age": [32, 40, 16, 34, 34, 40, 20, 15, 55, 15],
 9"Gender": ["M", "M", "F", "F", "M", "M", "F", "M", "F", "M"],
10"Sport": ["Football", "Neither", "Cricket", "Cricket", "Neither", "Cricket", "Neither", "Cricket", "Football", "Football"]
11}
12df = pd.DataFrame(data)
13# Step 2: Encode categorical data label_encoder = LabelEncoder()
14df["Gender"] = label_encoder.fit_transform(df["Gender"]) # M -> 0, F -> 1 df["Sport"] = label_encoder.fit_transform(df["Sport"])
15
16# Step 3: Prepare features and target X = df[["Age", "Gender"]]
17y = df["Sport"]
18
19# Step 4: Define test data (Angelina)
20new_data = np.array([[5, 1]]) # Age = 5, Gender = F (1)
21
22# Step 5: Train and predict with KNN
23knn = KNeighborsClassifier(n_neighbors=3, metric='euclidean') knn.fit(X, y)
24predicted_sport = knn.predict(new_data)
25
26# Decode sport back to original label
27predicted_sport_label = label_encoder.inverse_transform(predicted_sport)[0] print(f"Predicted Sport for Angelina: {predicted_sport_label}")
28
29# Step 6: Visualization plt.figure(figsize=(8, 6)) colors = ['red', 'blue', 'green'] labels = df["Sport"].unique()
30for sport, color in zip(labels, colors):
31plt.scatter(df[df["Sport"] == sport]["Age"], df[df["Sport"] == sport]["Gender"], label=sport, color=color)
32plt.scatter(new_data[0][0], new_data[0][1], color='black', marker='x', s=100, label='Angelina')
33plt.xlabel("Age") plt.ylabel("Gender")
34plt.title("KNN Classification of Sport based on Age and Gender") plt.legend()
35plt.grid(True) plt.show()

PROGRAM 7 - LINEAR REGRESSION USING MSE

 1
 2import numpy as np
 3import matplotlib.pyplot as plt
 4from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error
 5# Generate synthetic data np.random.seed(42)
 6X = 2 * np.random.rand(100, 1)
 7y = 4 + 3 * X + np.random.randn(100, 1)
 8
 9# Split the dataset into training and testing sets
10X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
11
12# Create and train the model model = LinearRegression() model.fit(X_train, y_train)
13
14# Make predictions
15y_pred = model.predict(X_test)
16# Evaluate the model
17mse = mean_squared_error(y_test, y_pred) print(f"Mean Squared Error: {mse}") print(f"Intercept: {model.intercept_[0]}") print(f"Coefficient: {model.coef_[0][0]}")
18
19# Plot results
20plt.scatter(X_test, y_test, color='blue', label='Actual Data') plt.plot(X_test, y_pred, color='red', linewidth=2, label='Regression Line') plt.xlabel("X")
21plt.ylabel("y") plt.legend()
22plt.title("Linear Regression Model") plt.show()

program 8 - naive bayes theorem

 1
 2from sklearn.datasets import fetch_20newsgroups 
 3from sklearn.model_selection import train_test_split 
 4from sklearn.feature_extraction.text import CountVectorizer 
 5from sklearn.naive_bayes import MultinomialNB 
 6from sklearn.metrics import accuracy_score, classification_report 
 7 
 8# Load dataset 
 9categories = ['sci.space', 'rec.sport.baseball'] 
10data = fetch_20newsgroups(subset='all', categories=categories, shuffle=True, 
11random_state=42) 
12 
13# Split data 
14X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.3, 
15random_state=42) 
16 
17# Convert text to numerical features 
18vectorizer = CountVectorizer() 
19X_train_counts = vectorizer.fit_transform(X_train) 
20X_test_counts = vectorizer.transform(X_test) 
21 
22# Train Naïve Bayes model 
23model = MultinomialNB() 
24model.fit(X_train_counts, y_train) 
25 
26# Make predictions 
27y_pred = model.predict(X_test_counts) 
28 
29# Evaluate 
30accuracy = accuracy_score(y_test, y_pred) 
31print(f"Accuracy: {accuracy:.2f}") 
32print("\nClassification Report:") 
33print(classification_report(y_test, y_pred, target_names=categories))

program 9 - genetic algorithm in python.

 1import random 
 2 
 3# Objective function: f(x) = x^2 
 4def fitness(x): 
 5return x**2 
 6# Create initial population 
 7def generate_population(size, x_min, x_max): 
 8return [random.randint(x_min, x_max) for _ in range(size)] 
 9# Select parents: roulette wheel selection 
10def select_parents(population): 
11fitnesses = [fitness(ind) for ind in population] 
12total_fit = sum(fitnesses) 
13probs = [f / total_fit for f in fitnesses] 
14parents = random.choices(population, weights=probs, k=2) 
15return parents 
16 
17# Crossover: single-point 
18def crossover(p1, p2): 
19point = random.randint(1, 4) 
20mask = (1 << point) - 1 
21child1 = (p1 & mask) | (p2 & ~mask) 
22child2 = (p2 & mask) | (p1 & ~mask) 
23return child1, child2 
24 
25# Mutation: flip 1 bit 
26def mutate(individual, mutation_rate=0.1): 
27if random.random() < mutation_rate: 
28bit = 1 << random.randint(0, 4) 
29individual ^= bit 
30return individual 
31 
32# GA driver 
33def genetic_algorithm(generations=20, pop_size=6, x_min=0, x_max=31): 
34population = generate_population(pop_size, x_min, x_max) 
35print(f"Initial Population: {population}") 
36 
37for gen in range(generations): 
38new_population = [] 
39for _ in range(pop_size // 2): 
40parent1, parent2 = select_parents(population) 
41child1, child2 = crossover(parent1, parent2) 
42new_population += [mutate(child1), mutate(child2)] 
43population = new_population 
44best = max(population, key=fitness) 
45print(f"Generation {gen+1}: Best = {best}, Fitness = {fitness(best)}") 
46return best 
47# Run the GA 
48best_solution = genetic_algorithm() 
49print(f"\nBest solution found: x = {best_solution}, f(x) = {fitness(best_solution)}")

program 10 - finite word classification using back propagation algorithm

 1
 2import numpy as np 
 3 
 4# Sigmoid activation and its derivative 
 5def sigmoid(x): 
 6return 1 / (1 + np.exp(-x)) 
 7def sigmoid_derivative(x): 
 8return x * (1 - x) 
 9# Training dataset: finite binary words and labels 
10X = np.array([ 
11[0, 0], 
12[0, 1], 
13[1, 0], 
14[1, 1] 
15]) 
16# Labels (for example, classify based on parity) 
17y = np.array([[0], [1], [1], [0]]) # XOR Problem 
18 
19# Seed for reproducibility 
20np.random.seed(1) 
21 
22# Initialize weights randomly with mean 0 
23input_layer_neurons = X.shape[1] 
24hidden_layer_neurons = 4 
25output_neurons = 1 
26 
27# Weights 
28hidden_weights = 2 * np.random.random((input_layer_neurons, hidden_layer_neurons)) - 1 
29output_weights = 2 * np.random.random((hidden_layer_neurons, output_neurons)) - 1 
30 
31# Learning rate 
32lr = 0.5 
33epochs = 10000 
34 
35# Training process 
36for epoch in range(epochs): 
37# Forward Propagation 
38hidden_input = np.dot(X, hidden_weights) 
39hidden_output = sigmoid(hidden_input) 
40 
41final_input = np.dot(hidden_output, output_weights) 
42predicted_output = sigmoid(final_input) 
43 
44# Error calculation 
45error = y - predicted_output 
46if epoch % 1000 == 0: 
47print(f"Epoch {epoch} Error: {np.mean(np.abs(error))}") 
48 
49# Backpropagation 
50d_predicted_output = error * sigmoid_derivative(predicted_output) 
51error_hidden_layer = d_predicted_output.dot(output_weights.T) 
52d_hidden_layer = error_hidden_layer * sigmoid_derivative(hidden_output)
53# Updating Weights 
54output_weights += hidden_output.T.dot(d_predicted_output) * lr 
55hidden_weights += X.T.dot(d_hidden_layer) * lr 
56# Final output 
57print("\nFinal Predicted Output:") 
58print(np.round(predicted_output, 3))