Last time I noticed that it takes O (number of areas) to find out which area a certain point belongs to, so I implemented it again. did
This time I tried to divide it while maintaining the tree structure properly
This way you can use O (tree depth) to find out which region a data point belongs to.
Or rather this is normal
Since there is a possibility that division will be repeated infinitely if there are multiple identical data points, the maximum number of divisions (maxdivision
) is given.
quadtree.py
# -*- coding: utf-8 -*-
class Quadtree:
def __init__(self,data,x1,y1,x2,y2,maxpoints,maxdivision):
self.data = data
self.sequential_id = 0
self.leaves = {}
self.root = self.divide(self.init_area(data,x1,y1,x2,y2),maxpoints,maxdivision)
def __iter__(self):
for aid in self.leaves:
yield self.leaves[aid]
def init_area(self,data,x1,y1,x2,y2):
initial = Area(x1,y1,x2,y2)
for d in data:
initial.append(d)
return initial
def subdivide(self,area):
division = []
"""Length of each side after division"""
xl = (area.x2 - area.x1)/2
yl = (area.y2 - area.y1)/2
"""Generate area after division"""
for dx in [0,1]:
for dy in [0,1]:
"""
0 2
1 3
Order
"""
sub_area = Area(area.x1+dx*xl, area.y1+dy*yl, area.x1+(1+dx)*xl, area.y1+(1+dy)*yl)
division.append(sub_area)
"""Assign data points belonging to the area before division to the area after division"""
for p in area.points():
for sub_area in division:
if sub_area.cover(p):
sub_area.append(p)
break
return division
def divide(self, area, maxpoints, division_left):
"""Division given as an argument_Repeat split only left times"""
if division_left == 0 or area.number_of_points() <= maxpoints:
"""If the number of data points included in the area does not exceed maxpoints, the division ends."""
area.set_fixed(self.sequential_id)
area.calc_center() #Calculate centroid of data points belonging to area
self.leaves[self.sequential_id] = area
self.sequential_id += 1
return area
else:
"""If the number of data points included in the area exceeds maxpoints, divide into four"""
next_level = self.subdivide(area)
"""Divide each child area further"""
for i in range(4):
child = self.divide(next_level[i],maxpoints,division_left-1)
area.set_child(i,child)
"""Returns the split area"""
return area
def get_area_id(self,p):
return self.root.covered(p)
class Area:
def __init__(self,x1,y1,x2,y2):
self.x1 = x1
self.y1 = y1
self.x2 = x2
self.y2 = y2
self.points_ = []
self.fixed = False
"""
0 2
1 3
"""
self.children = [None,None,None,None]
def calc_center(self):
if self.number_of_points() == 0:
self.center = (self.x1 + (self.x2-self.x1)/2, self.y1 + (self.y2-self.y1)/2)
else:
sumx = 0.
sumy = 0.
for p in self.points_:
sumx += p[0]
sumy += p[1]
self.center = (sumx/len(self.points_), sumy/len(self.points_))
def append(self, p):
"""Add data points to the area"""
self.points_.append(p)
def points(self):
"""Returns the data points that belong to the region"""
return self.points_
def number_of_points(self):
return len(self.points_)
def is_fixed(self):
"""Whether the split is over"""
return self.fixed
def set_fixed(self,aid):
"""Flag the split is complete"""
self.fixed = True
"""Give an ID to the area"""
self.aid = aid
def set_child(self,n,area):
self.children[n] = area
def covered(self,p):
if self.cover(p):
if self.fixed:
return self.aid
else:
"""Calculate the ID of the child area"""
cid = 0
if self.x1 + (self.x2 - self.x1) / 2 < p[0]:
""" Right """
cid += 2
if self.y1 + (self.y2 - self.y1) / 2 < p[1]:
""" Down """
cid += 1
return self.children[cid].covered(p)
else:
return None
def cover(self, p):
"""Whether a data point p is covered in this area"""
if self.x1 < p[0] and self.y1 < p[1] and self.x2 >= p[0] and self.y2 >= p[1]:
return True
else:
return False
if __name__ == '__main__':
import sys
def read_data(file_name):
data = []
for line in open(file_name, 'r'):
entries = line.rstrip().split(' ')
lat = float(entries[0])
lng = float(entries[1])
data.append((lat,lng))
return data
x1 = float(sys.argv[1])
y1 = float(sys.argv[2])
x2 = float(sys.argv[3])
y2 = float(sys.argv[4])
maxpoints = int(sys.argv[5])
maxdivision = int(sys.argv[6])
data = read_data(sys.argv[7])
"""Split"""
qtree = Quadtree(data,x1,y1,x2,y2,maxpoints,maxdivision)
"""result"""
for a in qtree:
print a.aid,a.x1,a.y1,a.x2,a.y2,a.center
p = (0.37,0.55)
print p,qtree.get_area_id(p)
The arguments are given in the order of x, y in the upper left coordinates of the target area, x, y in the lower right coordinates, the maximum number of data that can belong to the area, the maximum number of divisions, and the data file.
$ cat data
0.567603099626 0.410160220857
0.405568042222 0.555583016695
0.450289054963 0.252870772505
0.373657009068 0.549501477427
0.500192599714 0.352420542886
0.626796922 0.422685113179
0.527521290061 0.483502904656
0.407737520746 0.570049935936
0.578095278433 0.6959689898
0.271957975882 0.450460115198
0.56451369371 0.491139311353
0.532304354436 0.486931064003
0.553942716039 0.51953331907
0.627341495722 0.396617894317
0.454210189397 0.570214499065
0.327359895038 0.582972137899
0.422271372537 0.560892624101
0.443036148978 0.434529240506
0.644625936719 0.542486338813
0.447813648487 0.575896033203
0.534217713171 0.636123087401
0.348346109137 0.312959224746
0.484075186327 0.289521849258
0.588417643962 0.387831556678
0.613422176662 0.665770829308
0.60994411786 0.399778040078
0.425443751505 0.402619561402
0.504955932504 0.610015349003
0.402852203978 0.382379275531
0.387591801531 0.452180343665
$ python quadtree 0 0 1 1 3 3 data
0 0.0 0.0 0.25 0.25 (0.125, 0.125)
1 0.0 0.25 0.25 0.5 (0.125, 0.375)
2 0.25 0.0 0.5 0.25 (0.375, 0.125)
3 0.25 0.25 0.375 0.375 (0.348346109137, 0.312959224746)
4 0.25 0.375 0.375 0.5 (0.271957975882, 0.450460115198)
5 0.375 0.25 0.5 0.375 (0.467182120645, 0.2711963108815)
6 0.375 0.375 0.5 0.5 (0.414730976498, 0.417927105276)
7 0.0 0.5 0.25 0.75 (0.125, 0.625)
8 0.0 0.75 0.25 1.0 (0.125, 0.875)
9 0.25 0.5 0.375 0.625 (0.35050845205299996, 0.566236807663)
10 0.25 0.625 0.375 0.75 (0.3125, 0.6875)
11 0.375 0.5 0.5 0.625 (0.42752015467779997, 0.5665272218)
12 0.375 0.625 0.5 0.75 (0.4375, 0.6875)
13 0.25 0.75 0.5 1.0 (0.375, 0.875)
14 0.5 0.0 0.75 0.25 (0.625, 0.125)
15 0.5 0.25 0.625 0.375 (0.500192599714, 0.352420542886)
16 0.5 0.375 0.625 0.5 (0.5650506999425, 0.44322384960416666)
17 0.625 0.25 0.75 0.375 (0.6875, 0.3125)
18 0.625 0.375 0.75 0.5 (0.627069208861, 0.40965150374799997)
19 0.75 0.0 1.0 0.25 (0.875, 0.125)
20 0.75 0.25 1.0 0.5 (0.875, 0.375)
21 0.5 0.5 0.625 0.625 (0.5294493242714999, 0.5647743340365)
22 0.5 0.625 0.625 0.75 (0.5752450560886667, 0.6659543021696667)
23 0.625 0.5 0.75 0.625 (0.644625936719, 0.542486338813)
24 0.625 0.625 0.75 0.75 (0.6875, 0.6875)
25 0.5 0.75 0.75 1.0 (0.625, 0.875)
26 0.75 0.5 1.0 0.75 (0.875, 0.625)
27 0.75 0.75 1.0 1.0 (0.875, 0.875)
(0.37, 0.55) 9
As a result, it was divided into 28 regions (the number of leaf nodes in the tree).
Looking at the last row of the results, we can see that the region to which the given data point belongs is correctly determined.
Recommended Posts