What is Unsupervised Learning
Learn the fundamental concept of extracting patterns from data without labeled examples.
No Labels
Pattern Discovery
Hidden Structure
Unsupervised learning finds hidden patterns, structures, and relationships in data without explicit target variables or labels, letting the algorithm discover what's interesting on its own.
Supervised vs Unsupervised
Understand the key differences between supervised and unsupervised learning approaches.
Supervised Learning: Uses labeled data (X, y) to learn mapping function
Unsupervised Learning: Uses only input data (X) to discover patterns
Goal: Prediction vs Discovery
# Comparison of supervised vs unsupervised learning
import numpy as np
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression
# Generate sample data
X, y_true = make_blobs(n_samples=300, centers=4, random_state=42)
print("=== SUPERVISED LEARNING ===")
# We have both features (X) and labels (y_true)
supervised_model = LogisticRegression()
supervised_model.fit(X, y_true)
supervised_predictions = supervised_model.predict(X)
print(f"Supervised accuracy: {(supervised_predictions == y_true).mean():.3f}")
print("\\n=== UNSUPERVISED LEARNING ===")
# We only have features (X), no labels!
unsupervised_model = KMeans(n_clusters=4, random_state=42)
unsupervised_predictions = unsupervised_model.fit_predict(X)
# We can't directly compare to y_true in real unsupervised scenarios
# This is just for demonstration
print(f"Found {len(np.unique(unsupervised_predictions))} clusters")
print(f"Cluster centers:\\n{unsupervised_model.cluster_centers_}")
print("\\n=== KEY DIFFERENCES ====")
differences = {
"Data": {
"Supervised": "Features + Labels (X, y)",
"Unsupervised": "Features only (X)"
},
"Goal": {
"Supervised": "Predict labels for new data",
"Unsupervised": "Discover hidden patterns"
},
"Evaluation": {
"Supervised": "Compare predictions to true labels",
"Unsupervised": "Assess pattern quality/coherence"
}
}
for aspect, comparison in differences.items():
print(f"\\n{aspect}:")
for approach, description in comparison.items():
print(f" {approach}: {description}")
Types of Unsupervised Tasks
Explore the main categories of unsupervised learning problems and their applications.
Clustering: Group similar data points together
Dimensionality Reduction: Reduce feature space while preserving information
Association Rules: Find relationships between items
Anomaly Detection: Identify unusual or outlier data points
# Examples of different unsupervised learning tasks
import numpy as np
from sklearn.datasets import make_blobs, load_digits
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.ensemble import IsolationForest
print("=== CLUSTERING EXAMPLE ===")
# Generate data with natural clusters
X_clusters, _ = make_blobs(n_samples=200, centers=3, random_state=42)
kmeans = KMeans(n_clusters=3, random_state=42)
cluster_labels = kmeans.fit_predict(X_clusters)
print(f"Found {len(np.unique(cluster_labels))} clusters")
print(f"Cluster sizes: {np.bincount(cluster_labels)}")
print("\\n=== DIMENSIONALITY REDUCTION EXAMPLE ===")
# Load high-dimensional data (digits: 64 features)
digits = load_digits()
X_digits = digits.data
print(f"Original dimensions: {X_digits.shape}")
# Reduce to 2 dimensions
pca = PCA(n_components=2)
X_reduced = pca.fit_transform(X_digits)
print(f"Reduced dimensions: {X_reduced.shape}")
print(f"Variance explained: {pca.explained_variance_ratio_.sum():.3f}")
print("\\n=== ANOMALY DETECTION EXAMPLE ===")
# Generate normal data with some outliers
np.random.seed(42)
X_normal = np.random.normal(0, 1, (100, 2))
X_outliers = np.random.uniform(-4, 4, (10, 2))
X_mixed = np.vstack([X_normal, X_outliers])
# Detect anomalies
iso_forest = IsolationForest(contamination=0.1, random_state=42)
anomaly_labels = iso_forest.fit_predict(X_mixed)
n_anomalies = (anomaly_labels == -1).sum()
print(f"Detected {n_anomalies} anomalies out of {len(X_mixed)} points")
print("\\n=== TASK SUMMARY ===")
task_descriptions = {
"Clustering": "Group customers by purchasing behavior",