On this page
article
Computer Vision with TensorFlow
Build image classification and CNN models with TensorFlow/Keras — data pipelines, augmentation, transfer learning, and inference.
Computer vision models learn patterns from pixels. TensorFlow/Keras makes building CNNs and using pre-trained models straightforward.
Load and Preprocess Images
import tensorflow as tf
from tensorflow.keras import layers
import matplotlib.pyplot as plt
# Load a single image
img = tf.keras.utils.load_img(
"photo.jpg", target_size=(224, 224)
)
img_array = tf.keras.utils.img_to_array(img)
img_array = tf.expand_dims(img_array, 0) # batch dimension
print(img_array.shape) # (1, 224, 224, 3)
Image Data Pipeline
train_dir = "data/train"
val_dir = "data/validation"
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
train_ds = tf.keras.utils.image_dataset_from_directory(
train_dir,
image_size=IMG_SIZE,
batch_size=BATCH_SIZE,
label_mode="int",
)
val_ds = tf.keras.utils.image_dataset_from_directory(
val_dir,
image_size=IMG_SIZE,
batch_size=BATCH_SIZE,
label_mode="int",
)
class_names = train_ds.class_names
print(f"Classes: {class_names}")
# Performance optimization
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(AUTOTUNE)
val_ds = val_ds.cache().prefetch(AUTOTUNE)
Expected directory layout:
data/
├── train/
│ ├── cats/
│ └── dogs/
└── validation/
├── cats/
└── dogs/
Data Augmentation
data_augmentation = keras.Sequential([
layers.RandomFlip("horizontal"),
layers.RandomRotation(0.1),
layers.RandomZoom(0.1),
layers.RandomContrast(0.1),
])
# Preview augmentations
for images, labels in train_ds.take(1):
plt.figure(figsize=(10, 10))
for i in range(9):
augmented = data_augmentation(images[0:1], training=True)
plt.subplot(3, 3, i + 1)
plt.imshow(augmented[0].numpy().astype("uint8"))
plt.axis("off")
plt.show()
Build a CNN from Scratch
from tensorflow import keras
model = keras.Sequential([
data_augmentation,
layers.Rescaling(1./255),
layers.Conv2D(32, 3, activation="relu"),
layers.MaxPooling2D(),
layers.Conv2D(64, 3, activation="relu"),
layers.MaxPooling2D(),
layers.Conv2D(128, 3, activation="relu"),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dropout(0.5),
layers.Dense(128, activation="relu"),
layers.Dense(len(class_names), activation="softmax"),
])
model.compile(
optimizer="adam",
loss="sparse_categorical_crossentropy",
metrics=["accuracy"],
)
Train
history = model.fit(
train_ds,
validation_data=val_ds,
epochs=20,
callbacks=[
keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True),
keras.callbacks.ModelCheckpoint("best_cnn.keras", save_best_only=True),
],
)
Transfer Learning with MobileNetV2
Use a model pre-trained on ImageNet — much better with small datasets:
base_model = keras.applications.MobileNetV2(
input_shape=IMG_SIZE + (3,),
include_top=False,
weights="imagenet",
)
base_model.trainable = False
inputs = keras.Input(shape=IMG_SIZE + (3,))
x = data_augmentation(inputs)
x = keras.applications.mobilenet_v2.preprocess_input(x)
x = base_model(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.2)(x)
outputs = layers.Dense(len(class_names), activation="softmax")(x)
model = keras.Model(inputs, outputs)
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=0.0001),
loss="sparse_categorical_crossentropy",
metrics=["accuracy"],
)
model.fit(train_ds, validation_data=val_ds, epochs=10)
Inference on New Images
def predict_image(path):
img = tf.keras.utils.load_img(path, target_size=IMG_SIZE)
arr = tf.keras.utils.img_to_array(img)
arr = tf.expand_dims(arr, 0)
arr = keras.applications.mobilenet_v2.preprocess_input(arr)
preds = model.predict(arr, verbose=0)
idx = preds.argmax()
confidence = preds[0][idx]
return class_names[idx], confidence
label, conf = predict_image("test_cat.jpg")
print(f"{label} ({conf:.1%})")
Export for Mobile (TFLite)
converter = tf.lite.TFLiteConverter.from_saved_model("saved_model")
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()
with open("model.tflite", "wb") as f:
f.write(tflite_model)
Deploy TFLite models on Android, iOS, and edge devices.
CNN Architecture Comparison
| Model | Size | Speed | Accuracy |
|---|---|---|---|
| Custom CNN | Small | Fast | Moderate |
| MobileNetV2 | Medium | Fast | Good |
| EfficientNetB0 | Medium | Medium | Very Good |
| ResNet50 | Large | Slower | Excellent |
Related Chapters
Transfer learning with MobileNetV2 or EfficientNet is the fastest path to production-quality image classifiers.