Computer vision models learn patterns from pixels. TensorFlow/Keras makes building CNNs and using pre-trained models straightforward.

Load and Preprocess Images

  import tensorflow as tf
from tensorflow.keras import layers
import matplotlib.pyplot as plt

# Load a single image
img = tf.keras.utils.load_img(
    "photo.jpg", target_size=(224, 224)
)
img_array = tf.keras.utils.img_to_array(img)
img_array = tf.expand_dims(img_array, 0)  # batch dimension
print(img_array.shape)  # (1, 224, 224, 3)
  

Image Data Pipeline

  train_dir = "data/train"
val_dir = "data/validation"

IMG_SIZE = (224, 224)
BATCH_SIZE = 32

train_ds = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode="int",
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    val_dir,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode="int",
)

class_names = train_ds.class_names
print(f"Classes: {class_names}")

# Performance optimization
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(AUTOTUNE)
val_ds = val_ds.cache().prefetch(AUTOTUNE)
  

Expected directory layout:

  data/
├── train/
│   ├── cats/
│   └── dogs/
└── validation/
    ├── cats/
    └── dogs/
  

Data Augmentation

  data_augmentation = keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
    layers.RandomContrast(0.1),
])

# Preview augmentations
for images, labels in train_ds.take(1):
    plt.figure(figsize=(10, 10))
    for i in range(9):
        augmented = data_augmentation(images[0:1], training=True)
        plt.subplot(3, 3, i + 1)
        plt.imshow(augmented[0].numpy().astype("uint8"))
        plt.axis("off")
    plt.show()
  

Build a CNN from Scratch

  from tensorflow import keras

model = keras.Sequential([
    data_augmentation,
    layers.Rescaling(1./255),
    layers.Conv2D(32, 3, activation="relu"),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, activation="relu"),
    layers.MaxPooling2D(),
    layers.Conv2D(128, 3, activation="relu"),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dropout(0.5),
    layers.Dense(128, activation="relu"),
    layers.Dense(len(class_names), activation="softmax"),
])

model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"],
)
  

Train

  history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=20,
    callbacks=[
        keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True),
        keras.callbacks.ModelCheckpoint("best_cnn.keras", save_best_only=True),
    ],
)
  

Transfer Learning with MobileNetV2

Use a model pre-trained on ImageNet — much better with small datasets:

  base_model = keras.applications.MobileNetV2(
    input_shape=IMG_SIZE + (3,),
    include_top=False,
    weights="imagenet",
)
base_model.trainable = False

inputs = keras.Input(shape=IMG_SIZE + (3,))
x = data_augmentation(inputs)
x = keras.applications.mobilenet_v2.preprocess_input(x)
x = base_model(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.2)(x)
outputs = layers.Dense(len(class_names), activation="softmax")(x)

model = keras.Model(inputs, outputs)
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.0001),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"],
)

model.fit(train_ds, validation_data=val_ds, epochs=10)
  

Inference on New Images

  def predict_image(path):
    img = tf.keras.utils.load_img(path, target_size=IMG_SIZE)
    arr = tf.keras.utils.img_to_array(img)
    arr = tf.expand_dims(arr, 0)
    arr = keras.applications.mobilenet_v2.preprocess_input(arr)

    preds = model.predict(arr, verbose=0)
    idx = preds.argmax()
    confidence = preds[0][idx]
    return class_names[idx], confidence

label, conf = predict_image("test_cat.jpg")
print(f"{label} ({conf:.1%})")
  

Export for Mobile (TFLite)

  converter = tf.lite.TFLiteConverter.from_saved_model("saved_model")
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

with open("model.tflite", "wb") as f:
    f.write(tflite_model)
  

Deploy TFLite models on Android, iOS, and edge devices.

CNN Architecture Comparison

Model Size Speed Accuracy
Custom CNN Small Fast Moderate
MobileNetV2 Medium Fast Good
EfficientNetB0 Medium Medium Very Good
ResNet50 Large Slower Excellent

Transfer learning with MobileNetV2 or EfficientNet is the fastest path to production-quality image classifiers.