MemoLearning Unsupervised Learning

What is Unsupervised Learning

Learn the fundamental concept of extracting patterns from data without labeled examples.

No Labels Pattern Discovery Hidden Structure

Unsupervised learning finds hidden patterns, structures, and relationships in data without explicit target variables or labels, letting the algorithm discover what's interesting on its own.

Supervised vs Unsupervised

Understand the key differences between supervised and unsupervised learning approaches.

Supervised Learning: Uses labeled data (X, y) to learn mapping function
Unsupervised Learning: Uses only input data (X) to discover patterns
Goal: Prediction vs Discovery

          # Comparison of supervised vs unsupervised learning

          import numpy as np

          from sklearn.datasets import make_blobs

          from sklearn.cluster import KMeans

          from sklearn.linear_model import LogisticRegression

          # Generate sample data

          X, y_true = make_blobs(n_samples=300, centers=4, random_state=42)

          print("=== SUPERVISED LEARNING ===")

          # We have both features (X) and labels (y_true)

          supervised_model = LogisticRegression()

          supervised_model.fit(X, y_true)

          supervised_predictions = supervised_model.predict(X)

          print(f"Supervised accuracy: {(supervised_predictions == y_true).mean():.3f}")

          print("\\n=== UNSUPERVISED LEARNING ===")

          # We only have features (X), no labels!

          unsupervised_model = KMeans(n_clusters=4, random_state=42)

          unsupervised_predictions = unsupervised_model.fit_predict(X)

          # We can't directly compare to y_true in real unsupervised scenarios

          # This is just for demonstration

          print(f"Found {len(np.unique(unsupervised_predictions))} clusters")

          print(f"Cluster centers:\\n{unsupervised_model.cluster_centers_}")

          print("\\n=== KEY DIFFERENCES ====")

          differences = {

            "Data": {

              "Supervised": "Features + Labels (X, y)",

              "Unsupervised": "Features only (X)"

            },

            "Goal": {

              "Supervised": "Predict labels for new data",

              "Unsupervised": "Discover hidden patterns"

            },

            "Evaluation": {

              "Supervised": "Compare predictions to true labels",

              "Unsupervised": "Assess pattern quality/coherence"

            }

          }

          for aspect, comparison in differences.items():

            print(f"\\n{aspect}:")

            for approach, description in comparison.items():

              print(f"  {approach}: {description}")

Types of Unsupervised Tasks

Explore the main categories of unsupervised learning problems and their applications.

Clustering: Group similar data points together
Dimensionality Reduction: Reduce feature space while preserving information
Association Rules: Find relationships between items
Anomaly Detection: Identify unusual or outlier data points

          # Examples of different unsupervised learning tasks

          import numpy as np

          from sklearn.datasets import make_blobs, load_digits

          from sklearn.cluster import KMeans

          from sklearn.decomposition import PCA

          from sklearn.ensemble import IsolationForest

          print("=== CLUSTERING EXAMPLE ===")

          # Generate data with natural clusters

          X_clusters, _ = make_blobs(n_samples=200, centers=3, random_state=42)

          kmeans = KMeans(n_clusters=3, random_state=42)

          cluster_labels = kmeans.fit_predict(X_clusters)

          print(f"Found {len(np.unique(cluster_labels))} clusters")

          print(f"Cluster sizes: {np.bincount(cluster_labels)}")

          print("\\n=== DIMENSIONALITY REDUCTION EXAMPLE ===")

          # Load high-dimensional data (digits: 64 features)

          digits = load_digits()

          X_digits = digits.data

          print(f"Original dimensions: {X_digits.shape}")

          # Reduce to 2 dimensions

          pca = PCA(n_components=2)

          X_reduced = pca.fit_transform(X_digits)

          print(f"Reduced dimensions: {X_reduced.shape}")

          print(f"Variance explained: {pca.explained_variance_ratio_.sum():.3f}")

          print("\\n=== ANOMALY DETECTION EXAMPLE ===")

          # Generate normal data with some outliers

          np.random.seed(42)

          X_normal = np.random.normal(0, 1, (100, 2))

          X_outliers = np.random.uniform(-4, 4, (10, 2))

          X_mixed = np.vstack([X_normal, X_outliers])

          # Detect anomalies

          iso_forest = IsolationForest(contamination=0.1, random_state=42)

          anomaly_labels = iso_forest.fit_predict(X_mixed)

          n_anomalies = (anomaly_labels == -1).sum()

          print(f"Detected {n_anomalies} anomalies out of {len(X_mixed)} points")

          print("\\n=== TASK SUMMARY ===")

          task_descriptions = {

            "Clustering": "Group customers by purchasing behavior",

🔮 Unsupervised Learning

Unsupervised Learning Curriculum

Introduction to Unsupervised Learning

K-Means Clustering

Hierarchical Clustering

DBSCAN and Density-Based Clustering

Gaussian Mixture Models

Principal Component Analysis

t-SNE and Manifold Learning

Association Rule Mining

Anomaly Detection

Topic Modeling

Clustering Evaluation

Advanced Topics and Applications

Unit 1: Introduction to Unsupervised Learning

What is Unsupervised Learning

Supervised vs Unsupervised

Types of Unsupervised Tasks