This demo is a brief overview of the documentation https://github.com/JetBrains/lets-plot/blob/master/docs/geocoding.md
import shapely
from IPython.display import display, Markdown
from lets_plot import *
from lets_plot.geo_data import *
LetsPlot.setup_html()
The geodata is provided by © OpenStreetMap contributors and is made available here under the Open Database License (ODbL).
def run_catching(f):
def colored(s):
return Markdown('<span style="color: #ff0000">{}</span>'.format(s))
try:
f()
assert False, 'Error expected'
except Exception as e:
display(colored('{}'.format(e)))
Basic usage
# Level autodetection
geocode(names=['florida', 'tx']).get_geocodes()
# Result may contain the following columns:
# id - for internal use. Only in geocodes DataFrame.
# request - column with lowest administrative level. The request belongs to this level. Can be a city/county/state/country.
# parents - if provided. Can be a county/state/country.
# found name - name that found by geocoding.
# geometry - only in geometry DataFrame.
# Thanks to request column it's obvious that level was detected as state.
# Explicit level
geocode_states(['florida', 'tx']).get_geocodes()
# Parameters can be change between searches
florida = geocode_states('florida')
display(florida.countries('usa').get_geocodes())
display(florida.countries('uruguay').get_geocodes())
display(florida.countries(None).get_geocodes())
Scope
# str scope uses level autodetection.
# NB: Florida in USA is the most relevant result.
# We can't find Florida in Uruguay using only the name - parent (country or scope) is required.
geocode_states('florida').scope('uruguay').get_geocodes()
# Geocoder scope
uruguay = geocode_countries('uruguay')
geocode_states('florida').scope(uruguay).get_geocodes()
# Scope is a singleton. Collections are not allowed.
scope = ['uruguay']
run_catching(
lambda: geocode_states('florida').scope(scope).get_geocodes()
)
# Geocoder with more than one entry is not allowed too.
scope = geocode_countries(['uruguay', 'usa'])
run_catching(
lambda: geocode_states('florida').scope(scope).get_geocodes()
)
# str scope can be ambiguous.
run_catching(
lambda: geocode_cities('worcester').scope('worcester county').get_geocodes()
)
# Let's geocode Worcester County in a way how the service does - using level detection without parents.
# In fact Worcester County was found, but error message in case of parent ambiguity is not clear. We will improve it.
run_catching(
lambda: geocode_counties('worcester county').get_geocodes()
)
Parents.
# Parents should have same length as names
geocode_cities(['warwick', 'worcester'])\
.counties(['Worth County', 'worcester county'])\
.states(['georgia', 'massachusetts'])\
.get_geocodes()
# Parents can contain None items (e.g., countries with different administrative divisions).
geocode_cities(['warwick', 'worcester'])\
.states(['Georgia', None])\
.countries(['USA', 'United Kingdom'])\
.get_geocodes()
# Geocoder object can be used as parent. Number of entries should be same as the number of names.
s = geocode_states(['vermont', 'georgia']).scope('usa')
display(s.get_geocodes())
# NB: Parent request will be present in result as a column.
display(geocode_cities(['worcester', 'warwick']).states(s).get_geocodes())
# counties and states can be combined with scope. scope acts as a top level parent.
geocode_counties(['Dakota County', 'Nevada County']).states(['NE', 'AR']).scope('USA').get_geocodes()
# scope can't be combined with countries - geocoding won't try to guess what level is it
run_catching(
lambda: geocode_counties('Nevada County').countries('usa').scope('Arizona').get_geocodes()
)
# Parents and names should have same length
run_catching(
lambda: geocode_states(['florida', 'rivera']).countries('uruguay').get_geocodes()
)
# Same for Geocoder
countries = geocode_countries('uruguay')
run_catching(
lambda: geocode_states(['florida', 'rivera']).countries(countries).get_geocodes()
)
Ignoring an ambiguity
# Ambiguous result generates an error:
run_catching(
lambda: geocode_cities(['warwick', 'worcester']).get_geocodes()
)
# Ambiguous result can be converted to a matching result (e.g., for drawing on a map)
geocode_cities(['warwick', 'worcester']).allow_ambiguous().get_geocodes()
# Missing name gives an error
run_catching(
lambda: geocode_cities(names=['paris', 'worcester', 'foo']).get_geocodes()
)
# Missing parent also gives an error
run_catching(
lambda: geocode_cities('paris').countries('foo').get_geocodes()
)
# ignore_not_found() - ingore unknown names, keep everything else, including ambiguous names
run_catching(
lambda: geocode_cities(['paris', 'worcester', 'foo']).ignore_not_found().get_geocodes()
)
# Missing parent also gives an error
run_catching(
lambda: geocode_cities(['paris', 'worcester']).countries(['foo', None]).ignore_not_found().get_geocodes()
)
# ignore_all_errors() - keep only exactly matched names
geocode_cities(['paris', 'worcester', 'foo']).ignore_all_errors().get_geocodes()
geocode_cities(['paris', 'worcester']).countries(['france', 'foo']).ignore_all_errors().get_geocodes()
# ignore_not_found() + allow_ambiguous() - see all ambiguous names without "not found" error
geocode_cities(['paris', 'worcester', 'foo']).ignore_not_found().allow_ambiguous().get_geocodes()
# List only 10 first distinct not found names
run_catching(
lambda: geocode_cities(['foo', 'foo', 'foo', 'foo4', 'foo5', 'foo6', 'foo7', 'foo8', 'foo9', 'foo10', 'foo11', 'foo12', 'foo13', 'foo14', 'foo15']).get_geocodes()
)
# Empty DataFrame if no matching names left
geocode_cities('worcester').ignore_all_errors().get_geocodes()
where()
function
# Take object closest to a place.
boston = geocode_cities('boston')
geocode_cities('worcester').where('worcester', closest_to=boston).get_geocodes()
# Take object closest to a coordinate.
boston_coord = boston.get_centroids().geometry[0]
geocode_cities('worcester').where('worcester', closest_to=boston_coord).get_geocodes()
# Or take object within rectangular area
geocode_cities('worcester')\
.where('worcester', scope=shapely.geometry.box(-71.00, 42.00, -72.00, 43.00))\
.get_geocodes()
# Or by defining a query scope. In this case name from the scope will not go into the result DataFrame
massachusetts = geocode_states('massachusetts')
geocode_cities('worcester').where('worcester', scope=massachusetts).get_geocodes()
# Query scope also can be a string
geocode_cities('worcester').where('worcester', scope='massachusetts').get_geocodes()
# Query scope overrides parents while keeping parents in a result dataframe.
worcester_county=geocode_counties('Worcester County').states('massachusetts').countries('usa')
geocode_cities(['worcester', 'worcester'])\
.countries(['USA', 'United Kingdom'])\
.where('worcester', country='USA', scope=worcester_county)\
.get_geocodes()
# Query scope should contain single object
countries = geocode_countries(['usa', 'uruguay'])
run_catching(
lambda: geocode_states('florida').where('florida', scope=countries).get_geocodes()
)
# NB: Parent is used only for searching exact row in request.
# It doesn't modify any parent (neither existing or empty).
run_catching(
lambda: geocode_cities('worcester')\
.countries('USA')\
.where('worcester', country='USA', state='iowa', county='worcester county')\
.get_geocodes()
)
geocode_cities('warwick') \
.where('warwick', scope=shapely.geometry.box(-72, 41.5, -71, 42)) \
.allow_ambiguous() \
.get_geocodes()
Error handling
# Failed to find parent
run_catching(
lambda: geocode_states('florida').countries('foo').get_geocodes()
)
# ambiguous parent - Worcester County. Better message required.
run_catching(
lambda : geocode_cities('worcester').counties('worcester county').scope('usa').get_geocodes()
)
# No us-48 at non-state level
run_catching(
lambda: geocode_counties('us-48').get_geocodes()
)
Geocoding and geoms
cities = geocode_cities(['boston', 'new york'])
p = ggplot() + ggsize(300, 200)
# geocoder object can be used as map parameter to simply display a geometry
plots = GGBunch()
plots.add_plot(p + geom_map(map=cities, fill='gray') + ggtitle('geom_map()'), 0, 0)
plots.add_plot(p + geom_rect(map=cities, fill='gray') + ggtitle('geom_rect()'), 300, 0)
plots.add_plot(p + geom_point(map=cities) + ggtitle('geom_point()'), 600, 0)
plots
# GeoDataFrame also can be also as map parameter to display a geometry - syntax is the same as with Geocoder.
# It is usefull for optimisation - geocoder caches geocodes, but doesn't cache geometries.
centroids = cities.get_centroids()
bboxes = cities.get_limits()
boundaries = cities.get_boundaries()
p = ggplot() + ggsize(300, 200)
plots = GGBunch()
plots.add_plot(p + geom_map(map=cities.get_boundaries(), fill='gray') + ggtitle('geom_map()'), 0, 0)
plots.add_plot(p + geom_rect(map=cities.get_limits(), fill='gray') + ggtitle('geom_rect()'), 300, 0)
plots.add_plot(p + geom_point(map=cities.get_centroids()) + ggtitle('geom_point()'), 600, 0)
plots
map and map_join
# map_join allows to join data and geometry.
# To make it more difficult demo data contains cities with same name (Worcester).
# Also there is a city and state with same name (New York).
# All names are in lower case to distinct user input from geocoding result.
from pandas import *
d = pandas.DataFrame({
'City_Name': ['boston', 'new york', 'worcester', 'worcester'],
'State_Name': ['massachusetts', 'new york', 'vermont', 'massachusetts'],
'mean': [523, 556, 600, 533]
})
geocoder = geocode_cities(d.City_Name).states(d.State_Name)
geocoder.get_geocodes()
# Cache boundaries
background_states = geocode_states(['massachusetts', 'new york', 'vermont']).inc_res().get_boundaries()
def draw_plot(map, map_join):
return ggplot() + \
geom_map(map=background_states) + \
geom_point(aes(size='mean', color='City_Name'), data=d, map=map, map_join=map_join) + \
theme(axis_line='blank', axis_text='blank', axis_ticks='blank', axis_title='blank')
# Draw a GeoDataFrame with a data.
# Names in GeoDataFrame from Geocoder are predefined: 'city', 'county', 'state', 'country'
# Order of levels in map_join should match:
draw_plot(map=geocoder.get_centroids(), map_join=[['City_Name', 'State_Name'], ['city', 'state']])
# Note that Worcesters have proper position and data, but color is the same color.
# To make the color distinct a new column with combination of city and state names can be used.
# With Geocoder it is much easier to draw a data.
# Map columns will be generated with following order: city, county, state, country. Not used levels will be ommited.
# Data columns should follow this order.
draw_plot(map=geocoder, map_join=['City_Name', 'State_Name'])
# Not following the order leads to an unexpected result:
draw_plot(map=geocoder, map_join=['State_Name', 'City_Name'])
us48 = geocode_states('us-48').inc_res()
p = ggplot() + \
theme(axis_line='blank', axis_text='blank', axis_ticks='blank', axis_title='blank', legend_position='none') + \
ggsize(600, 300)
# Geocoder can be passed to a data parameter. In this case column 'found name' can be used for join and styling:
p + \
geom_map(aes(fill='found name'), data=us48, map=us48, map_join='found name', tooltips=layer_tooltips().line('@{found name}'))
# With GeoDataFrame as data a plot spec is even more compact
p + \
geom_map(aes(fill='found name'), data=us48.get_boundaries(), tooltips=layer_tooltips().line('@{found name}'))
# map_join works fine even when data and map rows doesn't match
# For the simplicity I'll re-use states from us-48. Names can be provided by user.
import random
random.seed(1)
area_of_interest = us48.get_geocodes().state.tolist()
length = 30
mean_by_state = {
'State_Name': random.sample(area_of_interest, length),
'Mean_Value': random.sample(range(0, 500), length)
}
p + geom_map(
aes(fill='Mean_Value'),
data=mean_by_state, map=us48,
map_join='State_Name',
tooltips=layer_tooltips()
.line('@{found name}')
.line('mean:|@Mean_Value')
)
# Note the variable 'found name' that is used in tooltip.
# Thanks to map_join this variable is available for tooltip processor.