import numpy as np
import matplotlib.pyplot as plt
import exif
from IPython.core.display import HTML
HTML("""
<style>
div.cell { /* Tunes the space between cells */
margin-top:1em;
margin-bottom:1em;
}
div.text_cell_render h1 { /* Main titles bigger, centered */
font-size: 2.2em;
line-height:0.9em;
}
div.text_cell_render h2 { /* Parts names nearer from text */
margin-bottom: -0.4em;
}
div.text_cell_render { /* Customize text cells */
font-family: 'Georgia';
font-size:1.2em;
line-height:1.4em;
padding-left:3em;
padding-right:3em;
}
.output_png {
display: table-cell;
text-align: center;
vertical-align: middle;
}
</style>
<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
""")
Before we can warp our images into alignment, we need to recover the parameters of the transformation between each pair of images. In our case, the transformation is a homography: $p’=Hp$, where $H \in R^{3\times3}$ matrix with 8 degrees of freedom (lower right corner is a scaling factor and can be set to 1). One way to recover the homography is via a set of $(p’,p)$ pairs of corresponding points taken from the two images.
We collected 20 points for our mapping, and then constructed an overdetermined system of equations in which we solve for our 8 homographic parameters using least squares.
For one pair of points, we can write the transform as $$p'=Hp$$ $$\begin{bmatrix}wx' \\ wy' \\ w\end{bmatrix} = \begin{bmatrix}a & b & c\\ c & d & e \\ f & g & h\end{bmatrix}\begin{bmatrix}x \\ y \\ 1\end{bmatrix}$$
Expanding the linear equations we get $$wx' = ax + by + c$$ $$wy' =dx + ey + f$$ $$w = gx + hy + 1$$
Substituiting our equation for $w$ into our first and second equations to get: $$x' = ax+by+c-gxx'-hyx'$$ $$y' = ax+by+c-gxy'-hyy'$$
We can rewrite this into a matrix vector form: $$\begin{bmatrix}x & y & 1 & 0 & 0 & 0 & -xx' & -yx'\\ 0 & 0 & 0 & x & y & 1 & -xy' & -yy'\end{bmatrix} \begin{bmatrix}a \\ b \\ c \\ d \\ e \\ f \\ g \\ h\end{bmatrix} = \begin{bmatrix} x' \\ y' \end{bmatrix}$$
With more points, we see that this form turns into the general $$Ah=b$$ form that can be solved with least squares to recover $h$.
Now that we can recover Homographies, we need to warp images. Before we start warping for stitching, we can "rectify" an image, as in make an image planar. This is done by defining 4 correspondences on the image, and fixing the corresponding points to be a rectangle.
I chose a picture I took in the Palace of Versailles that was too big to take from directly in front. The image below is the original image with the hand picked correspondences overlayed. The results of the rectification are shown below that. On the left is the full image rectified, and on the right is the rectification zoomed in on the artwork.
versailles = plt.imread("data/versailles.png")[...,:-1]
pts_versailles = np.load("out/versailles.npy")
pts_versailles.shape
plt.figure(figsize=(10,10))
plt.imshow(versailles)
plt.axis("off")
plt.scatter(*zip(*pts_versailles), color="orange")
plt.show()
def computeH(im1_pts,im2_pts):
assert im1_pts.shape == im2_pts.shape
A = np.zeros((im1_pts.shape[0]*2, 8))
b = np.zeros((im1_pts.shape[0]*2, 1))
for i in range(im1_pts.shape[0]):
x, y = im1_pts[i]
x_p, y_p = im2_pts[i]
b[2*i] = x_p
b[2*i+1] = y_p
A[2*i] = np.array([x, y, 1, 0, 0, 0, -x*x_p, -y*x_p])
A[2*i+1] = np.array([0, 0, 0, x, y, 1, -x*y_p, -y*y_p])
h_vec = np.linalg.lstsq(A, b, rcond=None)[0]
h_vec = np.append(h_vec, 1)
return h_vec.reshape(3,3)
# # # Forward map - bad has holes
# from scipy.interpolate import interp2d
# def warpImage(im, H):
# y_max, x_max = im.shape[:2]
# x_range = range(x_max)
# y_range = range(y_max)
# xx, yy = np.meshgrid(x_range, y_range)
# base_coord = np.vstack((xx.flatten(), yy.flatten(), np.ones(xx.flatten().shape)))
# trans_coord_homo = H.dot(base_coord)
# trans_coord = (trans_coord_homo[:2]/trans_coord_homo[-1])
# bbox_input = np.array([[0, 0, im.shape[1], im.shape[1]],
# [0, im.shape[0], 0, im.shape[0]],
# [1, 1, 1, 1]])
# bbox_output_homo = H.dot(bbox_input)
# bbox_out = bbox_output_homo[:2]/bbox_output_homo[-1]
# shift = -np.minimum(np.min(bbox_out, axis=1), np.zeros((2)))[:,None]
# bbox_out = (bbox_out+shift).astype(int)
# trans_coord = (trans_coord+shift).astype(int)
# base_coord = base_coord[:2].astype(int)
# new_im = np.zeros(tuple(np.max(bbox_out+1, axis=1))[::-1]+(3,))
# values = im[base_coord[1,:], base_coord[0,:]]
# new_im[trans_coord[1,:], trans_coord[0,:]] = values
# # max_x, max_y = np.max(trans_coord, axis=1)
# # bw_im = np.mean(new_im, axis=-1)
# # print(base_coord.shape)
# # print(values.shape)
# # points = base_coord.T
# # values = np.mean(values, axis=-1)
# # grid_x, grid_y = np.mgrid[0:new_im.shape[0], 0:new_im.shape[1]]
# # grid_z0 = griddata(points, values, (grid_x, grid_y), method='linear')
# # interp_splines = [RectBivariateSpline(range(max_y), range(max_x), new_im[...,i]) for i in range(3)]
# # f = interp2d(range(max_y), range(max_x), bw_im.flatten())
# # xnew, ynew = np.meshgrid(range(new_im.shape[0]), range(new_im.shape[1]))
# # tcks = [interpolate.bisplrep(x, y, new_im[...,i], s=0) for i in range(3)]
# # znew = interpolate.bisplev(xnew[:,0], ynew[0,:], tck)
# # print(new_im.shape)
# # print(im.shape)
# return new_im
def pad(im, size):
y_pad = max(0, size[0]+1-im.shape[0])
x_pad = max(0, size[1]+1-im.shape[1])
padding = ((y_pad//2, y_pad//2+y_pad%2), (x_pad//2, x_pad//2+x_pad%2), (0, 0))
return np.pad(im, padding)
# Inverse warp - good
def warpImage(im, H):
bbox_input = np.array([[0, 0, im.shape[1]-1, im.shape[1]-1],
[0, im.shape[0]-1, 0, im.shape[0]-1],
[1, 1, 1, 1]])
bbox_output_homo = H.dot(bbox_input)
bbox_out = bbox_output_homo[:2]/bbox_output_homo[-1]
shift = (-np.minimum(np.min(bbox_out, axis=1), np.zeros((2)))[:,None]).astype(int)
shift_homo = np.append(shift, 0)[:,None]
shifted_bbox_out = (bbox_out+shift).astype(int)
max_x, max_y = np.max(shifted_bbox_out, axis=1)
new_im = np.zeros((max_y+1, max_x+1, 3))
xx, yy = np.meshgrid(range(new_im.shape[1]), range(new_im.shape[0]))
base_coord = np.vstack((xx.flatten(), yy.flatten(), np.ones(xx.flatten().shape)))
trans_coord_homo = np.linalg.inv(H).dot(base_coord-shift_homo)
trans_coord = trans_coord_homo[:2]/trans_coord_homo[-1]
shift = -np.minimum(np.min(trans_coord, axis=1), np.zeros((2)))[:,None]
trans_coord = (trans_coord+shift).astype(int)
base_coord = (base_coord[:2]).astype(int)
bounds = np.max(trans_coord, axis=1)[::-1]
# print(im.shape, bounds)
im = pad(im, bounds)
values = im[trans_coord[1,:], trans_coord[0,:]]
new_im[base_coord[1,:], base_coord[0,:]] = values
return new_im, shift_homo[:-1,0]
y_lim, x_lim = versailles.shape[:2]
square = np.array([[0, y_lim],
[0, 0],
[x_lim, 0],
[x_lim, y_lim]])
H = computeH(pts_versailles, square)
# H_inv = computeH(square, pts_versailles)
res, shift = warpImage(versailles, H)
plt.figure(figsize=(20, 20))
plt.subplot(121)
plt.imshow(res)
plt.axis("off")
plt.subplot(122)
plt.imshow(res[shift[1]:shift[1]+y_lim, shift[0]:shift[0]+x_lim])
plt.axis("off")
plt.show()
# plt.imsave("out/versailles_inv.png", res)
# plt.imsave("out/versailles_zoomed.png", res[shift[1]:shift[1]+y_lim, shift[0]:shift[0]+x_lim])
For stitching two images together, we use the Homography to warp the left image into the frame of reference of the right image. We again use an Inverse Warping to avoid "holes" in our image. The warping (in this part) is done using hand picked correspondences. Below, you can see the images that will be stitched, the pre-stitch warp, and the resultant stitched image.
alley_left = plt.imread("data/alley_1.jpg")[::-1, ::-1]
alley_right = plt.imread("data/alley_2.jpg")[::-1, ::-1]
plt.figure(figsize=(20, 20))
plt.subplot(121)
plt.title("Alley Left")
plt.imshow(alley_left)
plt.axis("off")
plt.subplot(122)
plt.title("Alley Right")
plt.imshow(alley_right)
plt.axis("off")
plt.show()
pts_1_2 = np.load("out/alley_1_2.npy")
pts_2_1 = np.load("out/alley_2_1.npy")
H1_2 = computeH(pts_1_2, pts_2_1)
left_warped, offset = warpImage(alley_left, H1_2)
out_shapes = np.vstack((left_warped.shape[:2], alley_right.shape[:2]))
max_shapes = np.max(out_shapes, axis=0)
plt.figure(figsize=(20, 20))
plt.subplot(121)
plt.title("Alley Left (warped)")
plt.imshow(pad(left_warped/255, max_shapes))
plt.axis("off")
plt.subplot(122)
plt.title("Alley Right (padded)")
plt.imshow(pad(alley_right, max_shapes))
plt.axis("off")
plt.show()
def stitch_ims(im1_warped, im2, H, offsets, im1_pts, im2_pts, ff=100):
idx = np.random.choice(range(im1_pts.shape[0]))
align_point_homo = H.dot(np.array((im1_pts[idx][0],im1_pts[idx][1], 1)))
align_point = align_point_homo[:2]/align_point_homo[-1]
new_point = np.array([align_point[0] + offsets[0], align_point[1]+ offsets[1]]).astype(int)
old_point = np.array(im2_pts[idx]).astype(int)
canvas_width = im2.shape[1] + new_point[0] - old_point[0]
canvas_height = max(new_point[1], old_point[1]) + max(im2.shape[0]- old_point[1], im1_warped.shape[0]-new_point[1])
canvas_left = np.zeros((canvas_height, canvas_width, 3))
canvas_right = np.zeros((canvas_height, canvas_width, 3))
if new_point[1] < old_point[1]: # If warped image is higher than regular image
height_bounds = slice(old_point[1]-new_point[1]+ff, im1_warped.shape[0]+old_point[1]-new_point[1]+ff)
width_bounds = slice(new_point[0]-old_point[0], new_point[0]-old_point[0]+im2.shape[1])
canvas_left[height_bounds,:im1_warped.shape[1],:]= im1_warped
canvas_right[:img2.shape[0],width_bounds,:] = im2
else:
height_bounds = slice(new_point[1]-old_point[1]+ff, im2.shape[0]+new_point[1]-old_point[1]+ff)
width_bounds = slice(new_point[0]-old_point[0], new_point[0]-old_point[0]+im2.shape[1])
canvas_left[:im1_warped.shape[0],:im1_warped.shape[1],:] = im1_warped
canvas_right[height_bounds,width_bounds,:] = im2
right_line = im1_warped.shape[1]
left_line = new_point[0]-old_point[0]
overlap_width = (right_line-left_line)
c = overlap_width//2+left_line
overlap_width = min(250, overlap_width)
# print(overlap_width)
pad_l = c - overlap_width//2
pad_r = canvas_width - (c + overlap_width//2)
# overlap_width = im1_warped.shape[1]-new_point[0]-old_point[0]
# print(overlap_width)
mask = np.linspace(1, 0, overlap_width)
mask = np.pad(mask, (pad_l, pad_r), mode = "edge")
mask = np.tile(mask, (canvas_height, 1))
mask = np.repeat(mask[...,None], 3, axis=-1)
return mask*canvas_left + (1-mask)*canvas_right
stitch = stitch_ims(left_warped, alley_right, H1_2, offset, pts_1_2, pts_2_1, ff=100)
plt.figure(figsize=(20, 20))
plt.title("Stitched Image")
plt.imshow(stitch/255)
# plt.axis("off")
plt.show()
The next part of the project is automatically finding the correspondences for Homography computation. We follow the steps described by “Multi-Image Matching using Multi-Scale Oriented Patches” by Brown et al. The steps are as follows:
The first step in finding correspondences between the two images is finding good candidates based on recognizable features of an image. So we start with corners, which are found using a Harris Corner detector. While this is a good starting point, as you can see in the image on the left below, this returns way too many points to work with. So, in order to narrow down the potential corners that we can use, we implement Adaptive Non-Maximal Suppression (ANMS).
ANMS works as follows: for each interest point, we compare the corner strength to all other interest points, and keep track of the minimum distance to a larger magnitude interest point. After we've computed this minimum radius for each point, we sort the list of interest points by descending radius and take the top 500.
The minimum radius for each point is found using the following formula:
$$r_i = \min_j ||x_i-x_j||, s.t. f(x_i) < c_{\text{robust}}\cdot f(x_j), x_j\in\mathcal{I}$$Where $x_i$ is a 2D interest point image location, $\mathcal{I}$ is the set of all interest point locations, $c_{\text{robust}}=0.9$ is a hyper-parameter, and $f$ is the corner strength returned by the harris corner detector.
The result of ANMS on the Harris Corner points is shown below, on the right.
For each point from the ANMS, we simply take a 40x40 patch surrounding the point and downsample by a factor of 5. This results in an 8x8 patch for each interest point. Each patch is normalized to have $\mu=0$ and $\sigma=1$.
import numpy as np
import matplotlib.pyplot as plt
import harris
import skimage as sk
alley_left = plt.imread("data/alley_1.jpg")[::-1, ::-1]
alley_right = plt.imread("data/alley_2.jpg")[::-1, ::-1]
def anms(f_hm, coords, c_robust=0.9, n_ip=500):
rs = np.zeros((coords.shape[1], 2))
dists = harris.dist2(coords.T, coords.T)
f_vals = f_hm[coords[0], coords[1]]
for i in range(coords.shape[1]):
y,x = coords[:,i]
cond = f_hm[y,x]<c_robust*f_vals
res = np.where(cond, dists[i,:], 1e10)
min_idx = np.argmin(res)
min_val = dists[i][min_idx]
rs[i] = (i, min_val)
sorted_r_idx = np.argsort(rs[:,1])[::-1]
sorted_coord_idx = rs[sorted_r_idx,0].astype(int)
ret_val = coords[:,sorted_coord_idx][:,:n_ip]
return ret_val
def normalize(patches):
og_shape = patches.shape
reshaped = patches.reshape(og_shape[0], -1)
mean = np.mean(reshaped, axis=1)[:,None]
std = np.std(reshaped, axis=1)[:,None]
normalized = (reshaped-mean)/std
assert (np.isclose(np.mean(normalized, axis=1), 0)).all()
assert (np.isclose(np.std(normalized, axis=1),1)).all()
# return normalized.reshape(og_shape)
return normalized
def extract_features(im, verbose=False):
im_ds = sk.transform.resize(im, (im.shape[0] // 4, im.shape[1] // 4))
im_ds_bw = np.mean(im_ds, axis=-1)
f_hm, coords = harris.get_harris_corners(im_ds_bw)
sup_coords = anms(f_hm, coords)
patches = np.stack([im_ds_bw[sc[0]-20:sc[0]+20:5, sc[1]-20:sc[1]+20:5] for sc in sup_coords.T])
normalized_patches = normalize(patches)
return (normalized_patches, sup_coords, im_ds) if not verbose else (normalized_patches, sup_coords, coords, im_ds)
patches_left, sup_coord_left, left_ds = extract_features(alley_left)
patches_right, sup_coord_right, coord_right, right_ds = extract_features(alley_right, verbose=True)
plt.figure(figsize=(20, 20))
plt.subplot(121)
plt.title("Harris Corners on Right Image")
plt.imshow(right_ds)
plt.scatter(coord_right[1], coord_right[0], s=1)
plt.axis("off")
plt.subplot(122)
plt.title("ANMS Corners on Right Image")
plt.imshow(right_ds)
plt.scatter(sup_coord_right[1], sup_coord_right[0])
plt.axis("off")
plt.show()
Now, with our 8x8 feature patches extracted from ANMS, we need to match potential patches between images. This is done by finding patches that have the smallest L2 distance between them. As an intelligent thresholding metric, rather than thresholding the error of the closest match ($e_{1-NN}$), we threshold the ratio between the best match and the second best match $e_{1-NN}/e_{2-NN}$. The idea with this metric is that we are looking for matches that match well with their closest neighbor, but not as well to any other neighbors.
The result of doing this feature matching on the two alley images are shown below.
def find_matches(patches_1, coords_1, patches_2, coords_2, t=0.4):
patch_dist = harris.dist2(patches_1, patches_2)
N = patches_1.shape[0]
min_idxs = np.argmin(patch_dist, axis=1)
min_vals = np.min(patch_dist, axis=1)
second_min_vals = np.sort(patch_dist, axis=1)[:,1]
error_ratio = min_vals/second_min_vals
match_idx = error_ratio<t
matches_1 = np.arange(N)[match_idx]
matches_2 = min_idxs[match_idx]
pts_1 = coords_1[:, matches_1]
pts_2 = coords_2[:, matches_2]
return pts_1, pts_2
matches_left, matches_right = find_matches(patches_left, sup_coord_left, patches_right, sup_coord_right)
plt.figure(figsize=(20,20))
plt.subplot(121)
plt.title("Matched Features in Left Image")
plt.imshow(left_ds)
plt.scatter(matches_left[1], matches_left[0], c="red")
plt.axis("off")
plt.subplot(122)
plt.title("Matched Features in Right Image")
plt.imshow(right_ds)
plt.scatter(matches_right[1], matches_right[0], c="red")
plt.axis("off")
plt.show()
Now that we have a set of potential matches, we can compute our Homography. However, there might still be some incorrect matches in our feature pairings. So, in order to compute the Homography on this data in a robust way, we use RANSAC. The idea behind RANSAC is that, at each loop we:
And we repeat this procedure several times and keep the largest set of inliers. Once we have this largest set, we recompute a least squares estimate on all of the inliers, and this is the homography we can use to warp our image to stitch.
The result of stitching using this automated method is shown below on the right, and on the left is the results using the manual correspondence from Part A.
def ransac(matches_1, matches_2, n_ransac_loops=50000, eps=0.5):
matches_1_flipped = np.flip(matches_1.T, axis=1)
matches_2_flipped = np.flip(matches_2.T, axis=1)
n_samples = matches_1_flipped.shape[0]
largest_set_size = 0
largest_set = None
for i in range(n_ransac_loops):
# Select four feature pairs at random
idxs = np.random.choice(np.arange(n_samples), 4, replace=False)
samp1 = matches_1_flipped[idxs,:]
samp2 = matches_2_flipped[idxs,:]
#Compute homography
H = computeH(samp1, samp2)
#Compute inliers where SSD(p', Hp)<eps
p = np.vstack((matches_1_flipped.T,np.ones(n_samples)))
Hp_homo = H.dot(p)
Hp = (Hp_homo[:2]/Hp_homo[-1]).T
err = np.linalg.norm(matches_2_flipped-Hp, axis=1)
curr_set = err<eps
# Keep largest set of inliers
if np.sum(curr_set) > largest_set_size:
largest_set = curr_set
largest_set_size = np.sum(curr_set)
if largest_set_size >= 20:
break
print("Using RANSAC for {} iterations found {} inlier correspondences".format(i+1, largest_set_size))
final_samp1 = matches_1_flipped[largest_set]
final_samp2 = matches_2_flipped[largest_set]
H_ransac = computeH(final_samp1, final_samp2)
return H_ransac, final_samp1, final_samp2
# Recompute least square H estimate on all inliers
H_ransac, auto_pts_1_2, auto_pts_2_1 = ransac(matches_left, matches_right)
H_manual = computeH(pts_1_2/4, pts_2_1/4)
print("Manual Homography matrix:\n{}\nRANSAC Automatic Homography matrix:\n{}".format(H_manual, H_ransac))
manual_warp, offset = warpImage(left_ds, H_manual)
ransac_warp, offset_auto = warpImage(left_ds, H_ransac)
manual_stitch = stitch_ims(manual_warp, right_ds, H_manual, offset, pts_1_2/4, pts_2_1/4, ff=25)
auto_stitch = stitch_ims(ransac_warp, right_ds, H_ransac, offset_auto, auto_pts_1_2, auto_pts_2_1, ff=25)
plt.figure(figsize=(20,20))
plt.subplot(121)
plt.title("Stitch with Manual Correspondences")
plt.imshow(manual_stitch)
plt.axis("off")
plt.subplot(122)
plt.title("Stitch with Automatic Correspondence Selection")
plt.imshow(auto_stitch)
plt.axis("off")
plt.show()
Here are two other results of stitching. The selection of images is unfortunately boring due to the COVID-19 quarantine, but I tried to get a little variety in the images.
def auto_stitch(im_left, im_right, tuning=False, ff=10):
patches_left, sup_coord_left, left_ds = extract_features(im_left)
patches_right, sup_coord_right, right_ds = extract_features(im_right)
matches_left, matches_right = find_matches(patches_left, sup_coord_left, patches_right, sup_coord_right)
H_ransac, auto_pts_1_2, auto_pts_2_1 = ransac(matches_left, matches_right)
ransac_warp, offset_auto = warpImage(left_ds, H_ransac)
stitched_im = stitch_ims(ransac_warp, right_ds, H_ransac, offset_auto, auto_pts_1_2, auto_pts_2_1, ff=ff)
return (ransac_warp, right_ds, H_ransac, offset_auto, auto_pts_1_2, auto_pts_2_1, ff) if tuning else stitched_im
room_left = plt.imread("data/room_left.jpg")[::-1, ::-1]
room_right = plt.imread("data/room_right.jpg")[::-1, ::-1]
stitched_room = auto_stitch(room_left, room_right, ff=-12)
# ransac_warp, right_ds, H_ransac, offset_auto, auto_pts_1_2, auto_pts_2_1, _ = auto_stitch(room_left, room_right, tuning=True)
# stitched_room = stitch_ims(ransac_warp, right_ds, H_ransac, offset_auto, auto_pts_1_2, auto_pts_2_1, ff=-12)
plt.figure(figsize=(20,20))
plt.subplot(121)
plt.title("Room Left")
plt.imshow(room_left)
plt.axis("off")
plt.subplot(122)
plt.title("Room Right")
plt.imshow(room_right)
plt.axis("off")
plt.figure(figsize=(20,20))
plt.title("Stitched Room")
plt.imshow(stitched_room)
plt.axis("off")
plt.show()
kitchen_left = plt.imread("data/kitchen_left.jpg")[::-1, ::-1]
kitchen_right = plt.imread("data/kitchen_right.jpg")[::-1, ::-1]
stitched_kitchen = auto_stitch(kitchen_left, kitchen_right, ff=50)
# ransac_warp, right_ds, H_ransac, offset_auto, auto_pts_1_2, auto_pts_2_1, _ = auto_stitch(kitchen_left, kitchen_right, tuning=True)
# stitched_kitchen = stitch_ims(ransac_warp, right_ds, H_ransac, offset_auto, auto_pts_1_2, auto_pts_2_1, ff=50)
plt.figure(figsize=(20,20))
plt.subplot(121)
plt.title("Kitchen Left")
plt.imshow(kitchen_left)
plt.axis("off")
plt.subplot(122)
plt.title("Kitchen Right")
plt.imshow(kitchen_right)
plt.axis("off")
plt.figure(figsize=(20,20))
plt.title("Stitched Kitchen")
plt.imshow(stitched_kitchen)
plt.axis("off")
plt.show()