Image Representation
Learn how images are represented as numerical data for computer processing.
Digital Images: 2D/3D arrays of pixel values
Grayscale: Single channel with intensity values 0-255
Color (RGB): Three channels (Red, Green, Blue)
Shape: (Height, Width, Channels) for most frameworks
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
def demonstrate_image_representation():
"""Show how images are represented as arrays"""
print("=== IMAGE REPRESENTATION DEMO ===")
# Create a simple 8x8 grayscale image
simple_image = np.array([
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 255, 255, 0, 0, 255, 255, 0],
[0, 255, 255, 0, 0, 255, 255, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 255, 255, 0, 0, 0],
[0, 0, 255, 0, 0, 255, 0, 0],
[0, 0, 0, 255, 255, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0]
], dtype=np.uint8)
print(f"Image shape: {simple_image.shape}")
print(f"Data type: {simple_image.dtype}")
print(f"Min pixel value: {simple_image.min()}")
print(f"Max pixel value: {simple_image.max()}")
# Create a color (RGB) version
color_image = np.zeros((8, 8, 3), dtype=np.uint8)
color_image[:,:,0] = simple_image # Red channel
color_image[:,:,1] = simple_image // 2 # Green channel
color_image[:,:,2] = simple_image // 4 # Blue channel
print(f"\\nColor image shape: {color_image.shape}")
print(f"Number of channels: {color_image.shape[2]}")
# Show pixel values for a small region
print("\\n=== PIXEL VALUES (top-left 4x4) ===")
print("Grayscale:")
print(simple_image[:4, :4])
print("\\nRGB (Red channel):")
print(color_image[:4, :4, 0])
return simple_image, color_image
# Run demonstration
gray_img, color_img = demonstrate_image_representation()
print("\\n=== KEY CONCEPTS ===")
concepts = {
"Pixels": "Basic unit of digital images (picture elements)",
"Channels": "Color components (1 for grayscale, 3 for RGB)",
"Resolution": "Image dimensions (width x height)",
"Bit Depth": "Number of bits per pixel (8-bit = 256 levels)",
"Arrays": "Images stored as multi-dimensional arrays",
"Normalization": "Often scale pixels to [0,1] range for neural networks"
}
for concept, description in concepts.items():
print(f"{concept}: {description}")
Image Classification Challenges
Understand the fundamental challenges that make computer vision difficult.
Viewpoint Variation: Objects look different from different angles
Scale Variation: Objects appear at different sizes
Illumination: Lighting conditions affect appearance
Occlusion: Objects may be partially hidden
Intra-class Variation: Objects in same class look different
# Computer vision challenges demonstration
def demonstrate_cv_challenges():
"""Illustrate challenges in computer vision"""
print("=== COMPUTER VISION CHALLENGES ===")
challenges = {
"Viewpoint Variation": {
"Problem": "Same object looks different from different angles",
"Example": "A car from front vs side vs back view",
"CNN Solution": "Translation and rotation invariance through pooling"
},
"Scale Variation": {
"Problem": "Objects appear at different sizes",
"Example": "Cat close-up vs cat far away",
"CNN Solution": "Multi-scale feature detection with different filter sizes"
},
"Illumination Changes": {
"Problem":