OpenCV
Complete Guide
Har function kya karta hai, kese use karte hain — code ke saath, Hinglish mein samjhao style mein.
Getting Started — Basics
Import aur image read karna — ye sab seedha seekho
cv2.imread()
Disk se image read karta hai. By default BGR format mein load karta hai — RGB nahi! Isko samajhna zaroori hai.
import cv2 as cv # Simple read img = cv.imread('photo.jpg') # Grayscale mein read karo gray = cv.imread('photo.jpg', cv.IMREAD_GRAYSCALE) # Alpha channel ke saath (PNG) rgba = cv.imread('icon.png', cv.IMREAD_UNCHANGED) # Check karo ki load hua ya nahi if img is None: print("Image nahi mili! Path check karo")
cv2.imread(filename, flags)
Ye function disk se ek image file ko padhta hai aur NumPy array return karta hai.
| Parameter | Type | Description |
|---|---|---|
| filename | str | Image ka path (relative ya absolute) |
| flags | int | IMREAD_COLOR (1), IMREAD_GRAYSCALE (0), IMREAD_UNCHANGED (-1) |
cv2.imshow()
Image ko ek window mein display karta hai. Iske baad waitKey() lagana zaroori hai warna window flash karke band ho jaegi.
img = cv.imread('photo.jpg') cv.imshow('Meri Image', img) # 0 = indefinitely wait karo cv.waitKey(0) cv.destroyAllWindows() # Ya 'q' press pe band karo while True: cv.imshow('img', img) if cv.waitKey(1) == ord('q'): break cv.destroyAllWindows()
cv2.imshow(winname, mat)
Ek named window mein image render karta hai. Window automatically resize hoti hai image ke hisaab se.
| Parameter | Type | Description |
|---|---|---|
| winname | str | Window ka naam (title bar mein dikhai deta hai) |
| mat | ndarray | Show karne wala image (NumPy array) |
cv2.imwrite()
Processed image ko disk pe save karta hai. Extension se automatically format decide hota hai.
# JPEG save karo cv.imwrite('output.jpg', img) # JPEG quality set karo (0-100) cv.imwrite('output.jpg', img, [cv.IMWRITE_JPEG_QUALITY, 95]) # PNG compression (0-9) cv.imwrite('output.png', img, [cv.IMWRITE_PNG_COMPRESSION, 5]) # Success check karo success = cv.imwrite('out.jpg', img) print("Saved:", success) # True/False
cv2.imwrite(filename, img, params)
Image ko file mein save karta hai. Return value True/False hai — hamesha check karo!
| Parameter | Type | Description |
|---|---|---|
| filename | str | Save karne ki jagah + naam |
| img | ndarray | Save karne wala image |
| params | list | Optional: quality/compression params |
Image Operations
Color spaces, resize, crop, drawing — sab kuch
cv2.cvtColor()
Image ka color space badalta hai. BGR↔RGB, BGR→Gray, BGR→HSV — sab possible hai.
# BGR to Grayscale gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) # BGR to RGB (Matplotlib ke liye) rgb = cv.cvtColor(img, cv.COLOR_BGR2RGB) # BGR to HSV (color detection ke liye) hsv = cv.cvtColor(img, cv.COLOR_BGR2HSV) # HSV se specific color detect karo import numpy as np lower_blue = np.array([100, 50, 50]) upper_blue = np.array([130, 255, 255]) mask = cv.inRange(hsv, lower_blue, upper_blue)
cv2.resize()
Image ka size badao ya ghato. Width, height ya scale factor — dono se kaam chalta hai.
# Fixed size mein resize resized = cv.resize(img, (640, 480)) # Scale factor se resize (50% chota) h, w = img.shape[:2] small = cv.resize(img, (w//2, h//2)) # fx, fy se scale karo scaled = cv.resize(img, None, fx=0.5, fy=0.5, interpolation=cv.INTER_LINEAR) # Crop karo (NumPy slicing) crop = img[100:300, 50:250] # [y1:y2, x1:x2]
Drawing Functions
Image pe lines, circles, rectangles, text — sab draw kar sakte ho directly.
# Rectangle — face detection boxes cv.rectangle(img, (50,50), (200,200), (0,255,0), 2) # Circle cv.circle(img, (150,150), 50, (255,0,0), -1) # -1 = filled # Line cv.line(img, (0,0), (400,400), (0,0,255), 3) # Text likho cv.putText(img, 'Hello!', (50,50), cv.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2)
Filtering & Blur
Noise hatao, smooth karo, edges nikalo
Blur Types
Teen main types hain — Gaussian sabse common, Median noise removal ke liye, Bilateral edges preserve karta hai.
# Gaussian Blur — sabse common blur = cv.GaussianBlur(img, (5,5), 0) # Median Blur — salt & pepper noise med = cv.medianBlur(img, 5) # Bilateral — edges bachata hai bil = cv.bilateralFilter(img, 9, 75, 75) # Simple average blur avg = cv.blur(img, (5,5))
Thresholding
Image ko black & white banata hai ek value ke basis pe. OCR, object detection mein use hota hai.
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) # Simple threshold (127 se upar = white) _, thresh = cv.threshold( gray, 127, 255, cv.THRESH_BINARY) # Otsu — automatic best value nikalta hai _, otsu = cv.threshold( gray, 0, 255, cv.THRESH_BINARY + cv.THRESH_OTSU) # Adaptive — uneven lighting ke liye adapt = cv.adaptiveThreshold( gray, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY, 11, 2)
Edge Detection
Image mein edges (borders) dhundhta hai. Canny sabse popular aur reliable hai.
# Canny Edge Detection gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) edges = cv.Canny(gray, 100, 200) # 100=lower threshold, 200=upper # Sobel — gradient direction sobelX = cv.Sobel(gray, cv.CV_64F, 1, 0) sobelY = cv.Sobel(gray, cv.CV_64F, 0, 1) # Laplacian lap = cv.Laplacian(gray, cv.CV_64F)
Video & Camera
Webcam se live feed, video files process karna
VideoCapture
Webcam ya video file se frames lena. 0 = pehla webcam, path = video file.
# Webcam kholo cap = cv.VideoCapture(0) while cap.isOpened(): ret, frame = cap.read() if not ret: break # Har frame pe kuch karo gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY) cv.imshow('Webcam', gray) if cv.waitKey(1) == ord('q'): break cap.release() # Camera release karo cv.destroyAllWindows()
cap.set() — Camera Properties
Camera ki settings change kar sakte ho jaise resolution, FPS wagera.
# Resolution set karo cap.set(cv.CAP_PROP_FRAME_WIDTH, 1280) cap.set(cv.CAP_PROP_FRAME_HEIGHT, 720) # FPS set karo cap.set(cv.CAP_PROP_FPS, 30) # Current FPS jaano fps = cap.get(cv.CAP_PROP_FPS) print(f"FPS: {fps}")
VideoWriter
Processed frames ko video file mein save karo. Live processing ka output record karne ke liye.
fourcc = cv.VideoWriter_fourcc(*'mp4v') out = cv.VideoWriter( 'output.mp4', fourcc, 30.0, (640, 480)) # fps, size while cap.isOpened(): ret, frame = cap.read() if not ret: break out.write(frame) # Frame save out.release() # Important!
Contours & Shapes
Objects ke boundaries dhundhna aur analyze karna
findContours + drawContours
Binary image mein connected components ke outlines dhundhta hai. Object counting, shape detection ke liye essential hai.
# Step 1: Binary image banao gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) _, binary = cv.threshold(gray, 127, 255, cv.THRESH_BINARY) # Step 2: Contours dhundho contours, hierarchy = cv.findContours( binary, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE) print(f"Kitne objects mile: {len(contours)}") # Step 3: Draw karo + analyze karo for cnt in contours: area = cv.contourArea(cnt) if area > 500: # Chhote contours ignore cv.drawContours(img, [cnt], -1, (0,255,0), 2) x,y,w,h = cv.boundingRect(cnt) cv.rectangle(img, (x,y), (x+w,y+h), (255,0,0), 1)
Geometric Transforms
Rotate, flip, warp — image ki geometry badlo
Rotation & Flip
Image ko rotate ya flip karo. getRotationMatrix2D se custom angle pe rotate karo.
h, w = img.shape[:2] center = (w//2, h//2) # 45 degree rotate karo M = cv.getRotationMatrix2D(center, 45, 1.0) rotated = cv.warpAffine(img, M, (w, h)) # Flip — 0=vertical, 1=horizontal, -1=both flip_h = cv.flip(img, 1) flip_v = cv.flip(img, 0)
Perspective Transform
Document scanning ke liye — tilted image ko seedha karo. 4 points select karo, transform karo.
import numpy as np # Source points (original image mein) src = np.float32([ [50,50], [300,30], [20,280], [330,290]]) # Destination (seedha rectangle) dst = np.float32([ [0,0], [300,0], [0,300], [300,300]]) M = cv.getPerspectiveTransform(src, dst) warped = cv.warpPerspective(img, M, (300,300))
Real-World Techniques
Projects mein use hone wale complete workflows
👤 Face Detection Easy
Haar Cascade use karke faces detect karo — pre-trained model already OpenCV mein included hai. Webcam pe real-time bhi kaam karta hai.
# Model load karo face_cascade = cv.CascadeClassifier( cv.data.haarcascades + 'haarcascade_frontalface_default.xml') gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) faces = face_cascade.detectMultiScale( gray, scaleFactor=1.1, minNeighbors=5) for (x, y, w, h) in faces: cv.rectangle(img, (x,y), (x+w, y+h), (255,0,0), 2) print(f"Face at x={x}, y={y}") print(f"Total faces: {len(faces)}")
🎨 Color Object Detection Medium
HSV color space mein specific color ka mask banao, phir us color wale objects track karo. Live webcam pe colored ball track kar sakte ho.
import numpy as np cap = cv.VideoCapture(0) while True: _, frame = cap.read() hsv = cv.cvtColor(frame, cv.COLOR_BGR2HSV) # Red color range lower = np.array([0, 120, 70]) upper = np.array([10, 255, 255]) mask = cv.inRange(hsv, lower, upper) # Original image pe mask apply karo result = cv.bitwise_and(frame, frame, mask=mask) cv.imshow('Mask', mask) cv.imshow('Result', result) if cv.waitKey(1) == ord('q'): break
🏃 Motion Detection Medium
Do consecutive frames ka difference nikalo — jahan change ho wahan motion hai. Security camera jaise kaam karta hai.
cap = cv.VideoCapture(0) prev_frame = None while True: _, frame = cap.read() gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY) gray = cv.GaussianBlur(gray, (21,21), 0) if prev_frame is not None: diff = cv.absdiff(prev_frame, gray) _, thresh = cv.threshold( diff, 25, 255, cv.THRESH_BINARY) if thresh.sum() > 10000: print("⚠️ Motion detected!") prev_frame = gray if cv.waitKey(30) == ord('q'): break
📄 Document Scanner Hard
Phone se document scan karo — perspective warp karke straight, clean image banao. Edges detect karo → corners nikalo → warp karo.
import numpy as np def scan_document(img): gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) blur = cv.GaussianBlur(gray, (5,5), 0) edges = cv.Canny(blur, 75, 200) # Largest contour = document cnts, _ = cv.findContours( edges, cv.RETR_LIST, cv.CHAIN_APPROX_SIMPLE) cnts = sorted(cnts, key=cv.contourArea, reverse=True)[:5] for c in cnts: peri = cv.arcLength(c, True) approx = cv.approxPolyDP(c, 0.02*peri, True) if len(approx) == 4: # 4 corners! return approx return None
Quick Reference — Cheat Sheet
Sabse common functions ek jagah
import cv2 as cv import numpy as np # ── I/O ────────────────────────────────────── img = cv.imread('img.jpg') # Read gray = cv.imread('img.jpg', 0) # Grayscale cv.imwrite('out.jpg', img) # Save cv.imshow('win', img) # Display cv.waitKey(0) # Wait cv.destroyAllWindows() # Close # ── Info ───────────────────────────────────── h, w, c = img.shape # Height, Width, Channels img.dtype # uint8 # ── Color ──────────────────────────────────── gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) hsv = cv.cvtColor(img, cv.COLOR_BGR2HSV) rgb = cv.cvtColor(img, cv.COLOR_BGR2RGB) # ── Geometry ───────────────────────────────── resized = cv.resize(img, (640,480)) crop = img[y1:y2, x1:x2] flip = cv.flip(img, 1) # ── Blur ───────────────────────────────────── gauss = cv.GaussianBlur(img, (5,5), 0) med = cv.medianBlur(img, 5) # ── Threshold ──────────────────────────────── _, th = cv.threshold(gray, 127, 255, cv.THRESH_BINARY) # ── Edges ──────────────────────────────────── edges = cv.Canny(gray, 100, 200) # ── Draw ───────────────────────────────────── cv.rectangle(img, (x,y), (x+w,y+h), (0,255,0), 2) cv.circle(img, (cx,cy), r, (255,0,0), -1) cv.putText(img, 'text', (x,y), cv.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2)