Back to the main index
Part of the introductory series to using Python for Vision Research brought to you by the GestaltReVision group (KU Leuven, Belgium).
In this part we will capitalize on the basics you learned in Part 1 to build a real working experiment.
Authors: Bart Machilsen, Maarten Demeyer
Year: 2014
Copyright: Public Domain as in CC0
# Import all the necessary packages
import os, urllib2, json, io
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from matplotlib.colors import hsv_to_rgb
# Prepare directories
imagedir = os.path.join(os.getcwd(),'images')
if not os.path.isdir('results'):
os.makedirs('results')
if not os.path.isdir(os.path.join(imagedir,'panoramio')):
os.makedirs(os.path.join(imagedir,'panoramio'))
Fourier theorem (simplified): Any signal can be expressed as a sum of sine waves. A Fourier transformation therefore decomposes a signal into a series of sine waves, each with their own amplitude, frequency and phase.
The Fast Fourier Transform is a special case of this, decomposing equally spaced samples from the signal into an equal number of equally spaced discrete frequency components. This is an approximation of a real Fourier Transform.
# Frequency and sampling rate
ph = np.pi/2
freq = 20
amp = 1
n_samples = 200
# Define 1-D signal
x = np.linspace(0,1,n_samples)
y = np.sin(ph + (2*np.pi*freq*x))*amp
# Plot 1-D signal
plt.figure(figsize=(10,1))
plt.plot(x,y,'b-')
plt.show()
You could imagine this wave as a sound wave. This one-frequency wave would be a pure tone. If this would be a 'Do', then doubling the frequency would give a 'Do' but one octave higher.
def one_d_fft(y):
# Perform the actual FFT
F = np.fft.fft(y)
# The full spectrum F consists of complex numbers ordered by frequency
# So, extract frequency, amplitude, phase information like this
fft_freq = np.fft.fftfreq(len(F))*n_samples
fft_amp = np.abs(F)/n_samples
fft_ph = np.angle(F)
# Plot the result
plt.figure(figsize=(15,5))
plt.subplot(1,2,1)
plt.plot(fft_amp,'r.')
xt = np.linspace(0,len(fft_freq), 5).astype(int)
plt.xticks(xt, 1+fft_freq[xt-1].astype(int))
plt.xlabel("Frequency",fontsize=20), plt.ylabel("Amplitude",fontsize=20)
plt.subplot(1,2,2)
plt.plot(fft_ph,'b.')
plt.xticks(xt, 1+fft_freq[xt-1].astype(int))
plt.xlabel("Frequency",fontsize=20), plt.ylabel("Phase",fontsize=20)
plt.show()
return fft_amp, fft_ph
fft_amp, fft_ph = one_d_fft(y)
Note the scale of the axes.
The inverse Fourier transform reconstructs the original signal.
# Now do the inverse Fourier transofrm
i_F = fft_amp*n_samples * np.exp(1j*fft_ph)
i_y = np.fft.ifft(i_F).real
# Plot reconstructed 1-D signal
plt.figure(figsize=(10,1))
plt.hold(True)
plt.ylim([-1,1])
plt.plot(x,y,'b-')
plt.plot(x,i_y,'r-')
plt.hold(False)
plt.show()
More complex signals will show a more complex pattern of amplitudes and phases. For instance, this is the Fourier spectrum of a sum of three sine waves.
ph1 = np.pi/2; ph2 = 0; ph3 = 5*np.pi/4
freq1 = 9; freq2 = 20; freq3 = 35
amp1 = 1; amp2 = 1.5; amp3= 0.75
n_samples = 200
x = np.linspace(0,1,n_samples)
y1 = np.sin(ph1 + (2*np.pi*freq1*x))*amp1
y2 = np.sin(ph2 + (2*np.pi*freq2*x))*amp2
y3 = np.sin(ph3 + (2*np.pi*freq3*x))*amp3
ys = y1+y2+y3
plt.figure(figsize=(10,1))
plt.hold(True)
plt.plot(x,y1,'b-'); plt.plot(x,y2,'r-'); plt.plot(x,y3,'g-')
plt.plot(x,ys,'k-', linewidth=2)
plt.hold(False)
plt.show()
fft_amp, fft_ph = one_d_fft(ys)
Note that this is how MP3 compression works: components that are less relevant (e.g. low amplitude, or low sensitivity to its frequency) are removed from the Fourier transform to reduce the total amount of data that needsto be saved.
Fourier transforms in two dimensions work similarly, but naturally return 2D spectra as well. In case of images, these spectra will each have the size of a full image.
def do_fourier_transform(img):
# Do the fft
F = np.fft.fft2(img)
# Center the spectrum on the lowest frequency
F_centered = np.fft.fftshift(F)
# Extract amplitude and phase
A = np.abs(F_centered).real
P = np.angle(F_centered).real
# Return amplitude, phase, and the full spectrum
return A, P, F
plt.figure(figsize=(15,5))
img = plt.imread(os.path.join(imagedir,'forest.jpg'))
plt.subplot(1,4,1), plt.imshow(img), plt.axis('off')
img = np.mean(img,axis=2)
plt.subplot(1,4,2), plt.imshow(img, cmap='gray'), plt.axis('off')
A, P, F = do_fourier_transform(img)
plt.subplot(1,4,3), plt.imshow(np.log(A), cmap='gray'), plt.axis('off')
plt.subplot(1,4,4), plt.imshow(P, cmap='gray'), plt.axis('off')
plt.show()
You may notice a few things: There is a cardinal bias, and the lower frequencies have higher amplitudes. This is a fairly general pattern that is shared by many natural images. The real image information is in the more complex phase spectrum.
To illustrate the relation between frequency and amplitude more clearly, we will take a rotational average, regardless of orientation.
def get_band_mask(spectrum, band, n_bands):
"""Select a circular frequency band from the spectrum"""
# Get image coordinates, and center on 0
x,y = np.meshgrid(range(spectrum.shape[1]),range(spectrum.shape[0]))
x = x - np.max(x)/2
y = y - np.max(y)/2
# Compute distances from center
radius = np.hypot(x,y)
# Compute the min and max frequencies of this band
bw = np.amin(spectrum.shape)/(n_bands*2)
freqs = [0+bw*band,bw+bw*band]
# Create the corresponding mask
msk = np.zeros(spectrum.shape, dtype=bool)
msk[(radius<freqs[1])*(radius>freqs[0])]=True
# Do not include the zero-th frequency (overall luminance)
msk[x.shape[0]/2,y.shape[0]/2] = False
return msk
# An illustration of what this does
plt.figure(figsize=(15,5))
plt.subplot(1,7,1)
plt.imshow(np.log(A),cmap='gray')
plt.axis('off')
n_bands = 6
for band in np.arange(n_bands):
msk = get_band_mask(A, band, n_bands)
plt.subplot(1,n_bands+1,band+2)
plt.imshow(msk*np.log(A),cmap='gray')
plt.axis('off')
def get_rotational_average(A, n_bands):
"""Average over the contents of n frequency bands"""
res = []
for band in np.arange(n_bands):
msk = get_band_mask(A, band, n_bands)
res.append(np.mean(A[msk].flatten())/A.size)
return np.array(res, dtype=float)
rotavg = get_rotational_average(A, 20)
plt.figure(figsize=(5,5))
plt.plot(range(1,len(rotavg)),rotavg[1:],'k-')
plt.xlabel('Frequency Band')
plt.ylabel('Mean Amplitude')
plt.show()
Thus: Fourier amplitudes decrease exponentially as the inverse of the corresponding frequencies. It is said that natural images have a 1/f spectrum.
Fourier analysis is easily extended to the low-pass, high-pass or bandpass filtering of images. For instance:
def do_inverse_fourier_transform(A, P):
"""Reconstruct the image from amplitude and phase spectrum"""
F_centered = A * np.exp(1j*P)
F = np.fft.ifftshift(F_centered)
img = np.fft.ifft2(F).real
return img
# Do the Fourier transform, copy the amplitude spectrum
A, P, F = do_fourier_transform(img)
Af = A.copy()
# Compute one frequency at each pixel
fx = np.fft.fftshift(np.fft.fftfreq(A.shape[0]))
fy = np.fft.fftshift(np.fft.fftfreq(A.shape[1]))
fx,fy = np.meshgrid(fy,fx)
freq = np.hypot(fx,fy)
# Filter and reconstruct
bandpass = (freq>0) * (freq<0.05)
Af[~bandpass] = 0
f_img = do_inverse_fourier_transform(Af, P)
# Show result
plt.figure(figsize=(10,10))
plt.imshow(f_img,cmap='gray')
plt.axis('off')
plt.show()
You might notice the ringing artefacts familiar from JPEG compressed images, exaggerated here because of the simplified method. This is indeed how the compression algorithm works: higher frequency components are removed.
Amplitude and frequency can be decorrelated by a process called whitening. Quite simply, we apply a uniform amplitude spectrum and retain the phase spectrum.
# Generate and apply a uniform amplitude spectrum
wA = np.ones(A.shape)
w_img = do_inverse_fourier_transform(wA, P)
plt.figure(figsize=(10,10))
plt.imshow(w_img, cmap='gray'), plt.axis('off')
plt.show()
Clearly, the high frequencies dominate our perception now, even though physically all frequencies are equally strong.
The same phenomenon can be seen when we generate white noise, i.e. pixel luminances drawn randomly from a uniform distribution independent of one another. Such noise has a uniform amplitude spectrum, but perceptually we do not detect the lower frequencies.
# Create white noise
noise = np.random.rand(img.shape[0], img.shape[1])
# Do FFT and rotational average
An, Pn, Fn = do_fourier_transform(noise)
rotavg = get_rotational_average(An, 50)
# Generate plots
plt.figure(figsize=(15,5))
plt.subplot(1,2,1)
plt.imshow(noise, cmap='gray'), plt.axis('off')
plt.subplot(1,2,2)
plt.plot(range(1,len(rotavg)),rotavg[1:],'k-')
plt.xlabel('Frequency Band')
plt.ylabel('Mean Amplitude')
plt.show()
If we'd apply the more natural amplitude spectrum of the original image to this noise, we get a type of noise that is perceptually more balanced across frequencies. That is, its amplitude is the inverse of its frequency.
This is called 1/f noise.
def one_f(sh):
"""Generate a 1/f amplitude spectrum for a given image shape"""
fx = np.fft.fftshift(np.fft.fftfreq(sh[0]))
fy = np.fft.fftshift(np.fft.fftfreq(sh[1]))
fx,fy = np.meshgrid(fy,fx)
f = np.hypot(fx,fy)
A = np.abs(1/f)
A[f==0] = 0
return A
# Combine the 1/f amplitude spectrum with the noise phase spectrum
Apn = one_f(A.shape)
pn_img = do_inverse_fourier_transform(Apn, Pn)
rotavg = get_rotational_average(Apn, 50)
# Generate plots
plt.figure(figsize=(25,5))
plt.subplot(1,3,1)
plt.imshow(pn_img, cmap='gray'), plt.axis('off')
plt.subplot(1,3,2)
plt.plot(range(1,len(rotavg)),rotavg[1:],'k-')
plt.xlabel('Frequency Band')
plt.ylabel('Mean Amplitude')
plt.subplot(1,3,3)
plt.plot(np.log(np.linspace(0,0.5,len(rotavg)))[1:],np.log(rotavg[1:]),'k-')
plt.xlabel('Log of Frequency Band')
plt.ylabel('Log of Mean Amplitude')
plt.axis('equal')
plt.show()
1/f noise is more natural noise perceptually.
There are many natural image datasets available, each with their own properties. However for this course we'll take a different approach: fetch images from Panoramio (Google Earth).
def resize_and_crop(PIL_img, tarw, tarh):
"""Resize and crop image to target dimensions"""
w, h = PIL_img.size
tarr = min(float(w)/tarw,float(h)/tarh)
tars = (np.array(PIL_img.size)/tarr).astype(int)
PIL_img = PIL_img.resize(tars, Image.ANTIALIAS)
w, h = PIL_img.size
top = np.random.randint(0,h-tarh+1)
left = np.random.randint(0,w-tarw+1)
PIL_img = PIL_img.crop((left,top,left+tarw,top+tarh))
return PIL_img
def get_panoramio_images(n, tarw, tarh, lat, lon, tol):
"""API call to Panoramio to obtain latest images near a given location"""
if n>100:
raise Exception('Can only fetch 100 images')
total = 0
frval = 0
while total<n:
base_str = "http://www.panoramio.com/map/get_panoramas.php?set=full&from="+str(frval)+"&to="+str(frval+n)
options = {'size':'original',
'minx': str(lon-tol),
'miny': str(lat-tol),
'maxx': str(lon+tol),
'maxy': str(lat+tol)}
options_str = ""
for k,v in options.iteritems():
options_str = options_str + '&' + k + '=' + v
api_call = base_str + options_str
res = urllib2.urlopen(api_call).read()
photolist = json.loads(res)['photos']
if len(photolist)<n:
raise Exception('Not enough images available')
for photo in photolist:
print str(photo['photo_id'])
if (photo['width']>=tarw) and (photo['height']>=tarh):
img = urllib2.urlopen(photo['photo_file_url']).read()
img = io.BytesIO(img)
img = Image.open(img)
img = resize_and_crop(img, tarw, tarh) # resize and crop
img = img.convert('L')
img.save(os.path.join(imagedir, 'panoramio', str(photo['photo_id'])+'.png'))
total = total + 1
if total == n:
break
frval = frval + n
n = 20
tarw = 512
tarh = 512
lat = 50.876226 # Latitude PSI
lon = 4.707348 # Longitude PSI
tol = 0.5 # Search area
get_panoramio_images(n, tarw, tarh, lat, lon, tol)
Let's compute the correlation in luminance of each pixel and its horizontal neighbour to the right, at a given distance.
def compute_pixel_correlations(img):
"""Compare each pixel to its right neighbour X pixels away"""
n_dists = 200
dists = np.arange(n_dists)
correlation = np.zeros((n_dists,1))
columns = np.arange(img.shape[1])
plt.figure(figsize=(15,5))
for d in dists:
pixels1 = img[:,columns[0:-(d+1)]].flatten()
pixels2 = img[:,columns[d:-1]].flatten()
correlation[d] = np.corrcoef(pixels1, pixels2)[0, 1]
# Plot the first four as an illustration
if d < 4:
rselect = np.random.randint(0,pixels1.size,500)
plt.subplot(1,4,d+1)
plt.plot(pixels1[rselect],pixels2[rselect],'b.')
plt.title('dist = ' + str(d) + ' pixels')
plt.axis('scaled')
plt.xlim([np.min(img.flatten()),np.max(img.flatten())])
plt.ylim([np.min(img.flatten()),np.max(img.flatten())])
# Then plot the correlations over the entire range
plt.figure(figsize=(5,5))
plt.plot(dists, correlation, 'b.', lw=3)
plt.xlabel('Distance')
plt.ylabel('Correlation')
plt.xlim([0,n_dists])
plt.ylim([0,1])
plt.show()
img = plt.imread(os.path.join(imagedir,'oudemarkt.jpg'))
img = np.mean(img,axis=2)
plt.figure(figsize=(5,5))
plt.imshow(img, cmap='gray')
plt.show()
compute_pixel_correlations(img)
To further demonstrate the link between pixels correlations and the 1/f amplitude spectrum, consider what happens when we apply a uniform amplitude spectrum to the figure to decorrelate amplitude and frequency: the pixel correlations also disappear.
A,P,F = do_fourier_transform(img)
wA = np.ones(A.shape)
w_img = do_inverse_fourier_transform(wA, P)
plt.figure(figsize=(5,5))
plt.imshow(w_img, cmap='gray')
plt.show()
compute_pixel_correlations(w_img)
These pixel correlations are quite naturally exploited by the visual system in phenomena such as surface filling in.
We can also exploit pixel correlations technologically. Bill Geisler has patented a simple method of making point estimates of missing pixels, using higher-order frequency distributions. Let's consider the basic case of dead pixels on a digital camera.
def train_missingpixel(traindir):
"""Create the correlational structure
- 3D frequency distribution of luminances
- Neighbour-1, central pixel, and neighbour+1
"""
train_array = np.zeros((256,256,256)).astype(int)
traindir = os.path.join(imagedir, traindir)
photolist = os.listdir(traindir)
print len(photolist), 'images found'
for idx,photo in enumerate(photolist):
print idx
img = plt.imread(os.path.join(traindir,photo))
if img.ndim == 3:
img = np.mean(img, 2)
if np.max(img) <= 1:
img = np.round(img*255).astype(int)
# Do the counting
for row in np.arange(0,img.shape[0]):
irw = img[row,:]
for px in np.arange(len(irw)-2):
train_array[(irw[px],irw[px+2],irw[px+1])] += 1
# Take the maximal value
train_array = np.argmax(train_array, axis=1)
# Save the result
np.save(os.path.join('results','trained.npy'), train_array)
# Do the training!
train_missingpixel('panoramio')
# Illustrate the predicted distribution for specific neighbours
train_array = np.load(os.path.join('results','trained.npy'))
plt.figure(figsize=(8,8))
plt.imshow(train_array, cmap='gray', interpolation='nearest')
plt.show()
# What can we infer from this?
# Read image
img = plt.imread(os.path.join(imagedir,'kitten.jpg'))
if img.ndim == 3:
img = np.mean(img,2)
if np.max(img) <= 1:
img = img*255
img = np.round(img).astype(int)
# Read trained correlational structure
train_array = np.load(os.path.join('results','trained.npy'))
# Introduce dead pixels
damaged_x = np.random.randint(0,img.shape[0],img.size/10)
damaged_y = np.random.randint(0,img.shape[1],img.size/10)
damaged_img = img.copy()
damaged_img[damaged_x,damaged_y] = -1
# Show the damage done
plt.imshow(damaged_img, cmap='gray', interpolation='nearest')
plt.show()
plt.imsave(os.path.join('results','damaged.png'),damaged_img, cmap='gray')
# SLOW and IMPERFECT, but transparent pixel fixing
idxx,idxy = np.where(damaged_img==-1)
repaired_img = damaged_img.copy()
for px in range(len(idxx)):
# Don't deal with edge pixels
if idxx[px] <= 0 or idxx[px] >= img.shape[0]-1:
continue
# Get left and right pixels
leftpx = damaged_img[idxx[px]-1,idxy[px]]
rightpx = damaged_img[idxx[px]+1,idxy[px]]
# For clusters of dead pixels, move one more to the side
i=0
while leftpx == -1:
i = i+1
leftpx = damaged_img[idxx[px]-i,idxy[px]]
if idxx[px]-i < 0:
break
i=0
while rightpx == -1:
i = i + 1
rightpx = damaged_img[idxx[px]+i,idxy[px]]
if idxx[px]+i == img.shape[0]-1:
break
# Fill in the dead pixels
if leftpx > -1 and rightpx > -1:
repaired_img[idxx[px],idxy[px]] = train_array[leftpx,rightpx]
# Show the result
plt.imshow(repaired_img, cmap='gray', interpolation='nearest')
plt.show()
plt.imsave(os.path.join('results','repaired.png'),repaired_img, cmap='gray')
We will use oriented Gabors as the edge filters. For instance let's create a simple horizontal filter.
def render_gabor(pars):
"""Render a single Gabor patch"""
vals = np.linspace(-pars['hsize'], pars['hsize'], (2*pars['hsize'])+1)
xgr, ygr = np.meshgrid(vals, vals)
gaussian = np.exp(-(xgr**2+ygr**2)/(2*pars['sigma']**2))
slant = (ygr*(2*np.pi*pars['freq']*np.cos(pars['or'])) +
xgr*(2*np.pi*pars['freq']*np.sin(pars['or'])))
grating = pars['amp']*np.cos(slant+pars['phase'])
return gaussian*grating
# Define Gabor parameters
gabpars = {}
gabpars['freq'] = 0.1
gabpars['sigma'] = 2.5
gabpars['amp'] = 1.
gabpars['phase'] = np.pi/2
gabpars['hsize'] = 15.
gabpars['or'] = 0.
# Render the Gabor
gab = render_gabor(gabpars)
# Show it
plt.figure(figsize=(5,5))
plt.imshow(gab, cmap='gray', interpolation='nearest')
plt.show()
Now, let's read in the image to be filtered.
img = plt.imread(os.path.join(imagedir,'kitten.jpg'))
img = np.mean(img, 2)
plt.figure(figsize=(10,10))
plt.imshow(img, cmap='gray', interpolation='nearest')
plt.show()
To detect which parts of the image correspond best to the filter, we multiply them in the Fourier domain.
def filter_image(flt,img):
"""Filter an image with one filter"""
# Preparation: pad the arrays
hflt, vflt = flt.shape
himg, vimg = img.shape
hres = hflt+himg
vres = vflt+vimg
hres2 = 2**int(np.log(hres)/np.log(2.0) + 1.0 )
vres2 = 2**int(np.log(vres)/np.log(2.0) + 1.0 )
img = img[::-1,::-1]
# !!!THE FILTERING!!!
fftimage = (np.fft.fft2(flt, s=(hres2, vres2))*
np.fft.fft2(img, s=(hres2, vres2)))
res = np.fft.ifft2(fftimage).real
# Cut the actual filtered image from the result
res = res[(hflt/2):hres-(hflt/2)-1,(vflt/2):vres-(vflt/2)-1][::-1, ::-1]
# Return it
return res
filtered = filter_image(gab,img)
plt.figure(figsize=(10,10))
plt.imshow(filtered, cmap='gray', interpolation='nearest')
plt.show()
Clearly, the highest responses are on clear horizontal edges where the bright side is on the top. The lowest responses are on clear horizontal edges where the dark side is on the top. Through changing the orientation of the filter, edges of different orientations will start to light up.
So, let's try a filterbank with 50 different orientations:
n = 50
ors = np.linspace(0,2*np.pi,n+1)[:-1]
res = np.zeros(img.shape+(n,))
for idx,this_or in enumerate(ors):
# Render the filter
gabpars['or'] = this_or
gab = render_gabor(gabpars)
# Filter the image
filtered = filter_image(gab,img)
# Save the result in a numpy array
res[:,:,idx] = filtered
# And as images
plt.imsave(os.path.join('results','flt%02d.jpg'%idx), gab, cmap='gray')
plt.imsave(os.path.join('results','fimg%02d.jpg'%idx), filtered, cmap='gray')
So, for each pixel we now have a response strength for each orientation. Conveniently, I've saved these results in a 3D numpy array. All we need to do now to find out the estimated orientation of the (potential) edge for each pixel, is take the maximum across the third dimension.
# Find out the maximal response strength
strongest_resp = np.amax(res, axis=2)
# Find out to which filter it corresponds
strongest_or = np.argmax(res, axis=2)
# Show each array
plt.figure(figsize=(10,10))
plt.imshow(strongest_resp, cmap='gray', interpolation='nearest')
plt.show()
plt.figure(figsize=(10,10))
plt.imshow(strongest_or, cmap='gray', interpolation='nearest')
plt.show()
# Now let's combine these into one image
# Using an HSV colorspace (hue-saturation-value)
H = strongest_or.astype(float) / (len(ors)-1)
S = np.ones_like(H)
V = (strongest_resp-np.min(strongest_resp)) / np.max(strongest_resp)
HSV = np.dstack((H,S,V))
RGB = hsv_to_rgb(HSV)
# Render a hue circle as legend
sz = 100
x,y = np.meshgrid(range(sz),range(sz))
rad = np.hypot(x-sz/2,y-sz/2)
ang = 0.5+(np.arctan2(y-sz/2,x-sz/2)/(2*np.pi))
mask = (rad<sz/2)&(rad>sz/4)
hsv_legend = np.dstack((ang, np.ones_like(ang, dtype='float'), mask.astype('float')))
rgb_legend = hsv_to_rgb(hsv_legend)
RGB[:sz,:sz,:] = rgb_legend[::-1,::]
# Show result
plt.figure(figsize=(10,10))
plt.imshow(RGB, interpolation='nearest')
plt.show()
Better results still can be obtained by taking into account color contrasts as well, and by filtering the image at different spatial scales.
We can now summarize the distribution of edge orientations in this image. Weighed by the response strength, these are the most frequent orientations.
# Compute the height of each bar
to_plot = np.array([np.sum(strongest_resp[strongest_or==bn]) for bn in range(len(ors))])
to_plot = to_plot/np.max(to_plot)
# Create the figure
fig = plt.figure(figsize=(5,5))
ax_pol = fig.add_axes([0,0,1,1], polar=True)
bin_bases = np.linspace(0, 2*np.pi, len(ors)+1)[:-1]
ax_pol.bar(bin_bases, to_plot, width=2*np.pi/len(ors), bottom=0.1, align='center')
plt.yticks([])
plt.show()
Again, we have a clear cardinal bias, especially in the horizontal direction.
Next, we can look at the relative orientations of edges. I will try to give you an intuitive insight in this, by creating a frequency distribution of the relative orientations of all nearby edges, disregarding the exact distance or relative position.
First, let's threshold the image to retain only edges.
thresh = 95
thresh_val = np.percentile(strongest_resp, thresh)
plt.figure(figsize=(10,10))
plt.imshow(strongest_resp>thresh_val, cmap='gray', interpolation='nearest')
plt.show()
Next, we define 'nearby' as a square region of half-width 10 around each edge pixel, and start counting all the relative orientations within this little region.
nbh = 10
# Prepare result vector and coordinate arrays
to_plot = np.zeros(len(ors), dtype=int)
x,y = np.meshgrid(range(img.shape[1]),range(img.shape[0]))
# Threshold to get the edges
thresh_mask = (strongest_resp>thresh_val)
idxx,idxy = np.nonzero(thresh_mask)
# Again, a slow but transparent implementation
# Take one edge pixel at a time
dist_mask = np.zeros_like(img, dtype=bool)
for px in range(len(idxx)):
if not px%2500:
print px, len(idxx)
# Determine which other edges to take into account
dist_mask.fill(False)
dist_mask[idxx[px]-nbh:idxx[px]+nbh,idxy[px]-nbh:idxy[px]+nbh] = True # automatic edge clipping! :p
dist_mask[idxx[px],idxy[px]] = False
# Compute relative orientations for nearby other edges
rel_ors = strongest_or[dist_mask&thresh_mask] - strongest_or[idxx[px],idxy[px]]
rel_ors[rel_ors<0] = rel_ors[rel_ors<0] + len(ors)
# Count into their bins, and add to the previous results
to_plot = to_plot + np.array([np.sum(rel_ors==bn) for bn in range(len(ors))])
# Create the figure
fig = plt.figure(figsize=(5,5))
ax_pol = fig.add_axes([0,0,1,1], polar=True)
bin_bases = np.linspace(0, 2*np.pi, len(ors)+1)[:-1]
ax_pol.bar(bin_bases, to_plot.astype(float)/np.amax(to_plot), width=2*np.pi/len(ors), bottom=0.1, align='center')
plt.yticks([])
plt.show()
Clearly, most nearby edges have the same orientation and the same polarity, with a little bump in the frequency distribution for the opposite polarity of the same edge orientation as well.