This demo is a brief overview of the documentation https://github.com/JetBrains/lets-plot/blob/master/docs/geocoding.md

import shapely
from IPython.display import display, Markdown

from lets_plot import *
from lets_plot.geo_data import *

LetsPlot.setup_html()
The geodata is provided by © OpenStreetMap contributors and is made available here under the Open Database License (ODbL).
def run_catching(f):
    def colored(s):
        return Markdown('<span style="color: #ff0000">{}</span>'.format(s))
    try:
        f()
        assert False'Error expected'
    except Exception as e:
        display(colored('{}'.format(e)))

Basic usage

# Level autodetection
geocode(names=['florida''tx']).get_geocodes()

# Result may contain the following columns:
# id - for internal use. Only in geocodes DataFrame.
# request - column with lowest administrative level. The request belongs to this level. Can be a city/county/state/country.
# parents - if provided. Can be a county/state/country.
# found name - name that found by geocoding.
# geometry - only in geometry DataFrame.

# Thanks to request column it's obvious that level was detected as state.
# Explicit level
geocode_states(['florida''tx']).get_geocodes()
# Parameters can be change between searches
florida = geocode_states('florida')

display(florida.countries('usa').get_geocodes())
display(florida.countries('uruguay').get_geocodes())
display(florida.countries(None).get_geocodes())

Scope

# str scope uses level autodetection.
# NB: Florida in USA is the most relevant result.
#     We can't find Florida in Uruguay using only the name - parent (country or scope) is required.
geocode_states('florida').scope('uruguay').get_geocodes()
# Geocoder scope
uruguay = geocode_countries('uruguay')
geocode_states('florida').scope(uruguay).get_geocodes()
# Scope is a singleton. Collections are not allowed.
scope = ['uruguay']
run_catching(
    lambda: geocode_states('florida').scope(scope).get_geocodes()
)
# Geocoder with more than one entry is not allowed too.
scope = geocode_countries(['uruguay''usa'])
run_catching(
    lambda: geocode_states('florida').scope(scope).get_geocodes()
)
# str scope can be ambiguous.
run_catching(
    lambda: geocode_cities('worcester').scope('worcester county').get_geocodes()
)

# Let's geocode Worcester County in a way how the service does - using level detection without parents.
# In fact Worcester County was found, but error message in case of parent ambiguity is not clear. We will improve it.
run_catching(
    lambda: geocode_counties('worcester county').get_geocodes()
)

Parents.

# Parents should have same length as names
geocode_cities(['warwick''worcester'])\
    .counties(['Worth County''worcester county'])\
    .states(['georgia''massachusetts'])\
    .get_geocodes()
# Parents can contain None items (e.g., countries with different administrative divisions).
geocode_cities(['warwick''worcester'])\
    .states(['Georgia'None])\
    .countries(['USA''United Kingdom'])\
    .get_geocodes()
# Geocoder object can be used as parent. Number of entries should be same as the number of names.
s = geocode_states(['vermont''georgia']).scope('usa')
display(s.get_geocodes())

# NB: Parent request will be present in result as a column.
display(geocode_cities(['worcester''warwick']).states(s).get_geocodes())
# counties and states can be combined with scope. scope acts as a top level parent.
geocode_counties(['Dakota County''Nevada County']).states(['NE''AR']).scope('USA').get_geocodes()
# scope can't be combined with countries - geocoding won't try to guess what level is it
run_catching(
    lambda: geocode_counties('Nevada County').countries('usa').scope('Arizona').get_geocodes()
)
# Parents and names should have same length
run_catching(
    lambda: geocode_states(['florida''rivera']).countries('uruguay').get_geocodes()
)
# Same for Geocoder
countries = geocode_countries('uruguay')
run_catching(
    lambda: geocode_states(['florida''rivera']).countries(countries).get_geocodes()
)

Ignoring an ambiguity

# Ambiguous result generates an error:
run_catching(
    lambda: geocode_cities(['warwick''worcester']).get_geocodes()
)
# Ambiguous result can be converted to a matching result (e.g., for drawing on a map)
geocode_cities(['warwick''worcester']).allow_ambiguous().get_geocodes()
# Missing name gives an error
run_catching(
    lambda: geocode_cities(names=['paris''worcester''foo']).get_geocodes()
)

# Missing parent also gives an error
run_catching(
    lambda: geocode_cities('paris').countries('foo').get_geocodes()
)
# ignore_not_found() - ingore unknown names, keep everything else, including ambiguous names
run_catching(
    lambda: geocode_cities(['paris''worcester''foo']).ignore_not_found().get_geocodes()
)

# Missing parent also gives an error
run_catching(
    lambda: geocode_cities(['paris''worcester']).countries(['foo'None]).ignore_not_found().get_geocodes()
)
# ignore_all_errors() - keep only exactly matched names
geocode_cities(['paris''worcester''foo']).ignore_all_errors().get_geocodes()

geocode_cities(['paris''worcester']).countries(['france''foo']).ignore_all_errors().get_geocodes()
# ignore_not_found() + allow_ambiguous() - see all ambiguous names without "not found" error
geocode_cities(['paris''worcester''foo']).ignore_not_found().allow_ambiguous().get_geocodes()
# List only 10 first distinct not found names
run_catching(
    lambda: geocode_cities(['foo''foo''foo''foo4''foo5''foo6''foo7''foo8''foo9''foo10''foo11''foo12''foo13''foo14''foo15']).get_geocodes()
)
# Empty DataFrame if no matching names left
geocode_cities('worcester').ignore_all_errors().get_geocodes()

where() function

# Take object closest to a place.
boston = geocode_cities('boston')
geocode_cities('worcester').where('worcester'closest_to=boston).get_geocodes()
# Take object closest to a coordinate.
boston_coord = boston.get_centroids().geometry[0]
geocode_cities('worcester').where('worcester'closest_to=boston_coord).get_geocodes()
# Or take object within rectangular area
geocode_cities('worcester')\
    .where('worcester'scope=shapely.geometry.box(-71.0042.00, -72.0043.00))\
    .get_geocodes()
# Or by defining a query scope. In this case name from the scope will not go into the result DataFrame
massachusetts = geocode_states('massachusetts')
geocode_cities('worcester').where('worcester'scope=massachusetts).get_geocodes()
# Query scope also can be a string
geocode_cities('worcester').where('worcester'scope='massachusetts').get_geocodes()
# Query scope overrides parents while keeping parents in a result dataframe.
worcester_county=geocode_counties('Worcester County').states('massachusetts').countries('usa')

geocode_cities(['worcester''worcester'])\
    .countries(['USA''United Kingdom'])\
    .where('worcester'country='USA'scope=worcester_county)\
    .get_geocodes()
# Query scope should contain single object
countries = geocode_countries(['usa''uruguay'])
run_catching(
    lambda: geocode_states('florida').where('florida'scope=countries).get_geocodes()
)
# NB: Parent is used only for searching exact row in request. 
#     It doesn't modify any parent (neither existing or empty).
run_catching(
    lambda: geocode_cities('worcester')\
        .countries('USA')\
        .where('worcester'country='USA'state='iowa'county='worcester county')\
        .get_geocodes()
)
geocode_cities('warwick') \
    .where('warwick'scope=shapely.geometry.box(-7241.5, -7142)) \
    .allow_ambiguous() \
    .get_geocodes()

Error handling

# Failed to find parent
run_catching(
    lambda: geocode_states('florida').countries('foo').get_geocodes()
)
# ambiguous parent - Worcester County. Better message required.
run_catching(
    lambda : geocode_cities('worcester').counties('worcester county').scope('usa').get_geocodes()
)
# No us-48 at non-state level
run_catching(
    lambda: geocode_counties('us-48').get_geocodes()
)

Geocoding and geoms

cities = geocode_cities(['boston''new york'])
p = ggplot() + ggsize(300200)
# geocoder object can be used as map parameter to simply display a geometry
plots = GGBunch()
plots.add_plot(p + geom_map(map=cities, fill='gray') + ggtitle('geom_map()'), 00)
plots.add_plot(p + geom_rect(map=cities, fill='gray') + ggtitle('geom_rect()'), 3000)
plots.add_plot(p + geom_point(map=cities) + ggtitle('geom_point()'), 6000)
plots
# GeoDataFrame also can be also as map parameter to display a geometry - syntax is the same as with Geocoder.
# It is usefull for optimisation - geocoder caches geocodes, but doesn't cache geometries.
centroids = cities.get_centroids()
bboxes = cities.get_limits()
boundaries = cities.get_boundaries()
p = ggplot() + ggsize(300200)
plots = GGBunch()
plots.add_plot(p + geom_map(map=cities.get_boundaries(), fill='gray') + ggtitle('geom_map()'), 00)
plots.add_plot(p + geom_rect(map=cities.get_limits(), fill='gray') + ggtitle('geom_rect()'), 3000)
plots.add_plot(p + geom_point(map=cities.get_centroids()) + ggtitle('geom_point()'), 6000)
plots

map and map_join

# map_join allows to join data and geometry.
# To make it more difficult demo data contains cities with same name (Worcester).
# Also there is a city and state with same name (New York). 
# All names are in lower case to distinct user input from geocoding result.
from pandas import *
d = pandas.DataFrame({
    'City_Name': ['boston''new york''worcester''worcester'],
    'State_Name': ['massachusetts''new york''vermont''massachusetts'],
    'mean': [523556600533]
})

geocoder = geocode_cities(d.City_Name).states(d.State_Name)
geocoder.get_geocodes()
# Cache boundaries
background_states = geocode_states(['massachusetts''new york''vermont']).inc_res().get_boundaries()

def draw_plot(map, map_join):
    return ggplot() + \
        geom_map(map=background_states) + \
        geom_point(aes(size='mean'color='City_Name'), data=d, map=mapmap_join=map_join) + \
        theme(axis_line='blank'axis_text='blank'axis_ticks='blank'axis_title='blank')
# Draw a GeoDataFrame with a data. 
# Names in GeoDataFrame from Geocoder are predefined: 'city', 'county', 'state', 'country'
# Order of levels in map_join should match:
draw_plot(map=geocoder.get_centroids(), map_join=[['City_Name''State_Name'], ['city''state']])

# Note that Worcesters have proper position and data, but color is the same color. 
# To make the color distinct a new column with combination of city and state names can be used.
# With Geocoder it is much easier to draw a data. 
# Map columns will be generated with following order: city, county, state, country. Not used levels will be ommited.
# Data columns should follow this order.
draw_plot(map=geocoder, map_join=['City_Name''State_Name'])
# Not following the order leads to an unexpected result: 
draw_plot(map=geocoder, map_join=['State_Name''City_Name'])
us48 = geocode_states('us-48').inc_res()
p = ggplot() + \
    theme(axis_line='blank'axis_text='blank'axis_ticks='blank'axis_title='blank'legend_position='none') + \
    ggsize(600300)
# Geocoder can be passed to a data parameter. In this case column 'found name' can be used for join and styling:
p + \
    geom_map(aes(fill='found name'), data=us48, map=us48, map_join='found name'tooltips=layer_tooltips().line('@{found name}'))
# With GeoDataFrame as data a plot spec is even more compact
p + \
    geom_map(aes(fill='found name'), data=us48.get_boundaries(), tooltips=layer_tooltips().line('@{found name}'))
# map_join works fine even when data and map rows doesn't match

# For the simplicity I'll re-use states from us-48. Names can be provided by user.
import random
random.seed(1)
area_of_interest = us48.get_geocodes().state.tolist()
length = 30
mean_by_state = {
    'State_Name': random.sample(area_of_interest, length),
    'Mean_Value': random.sample(range(0500), length)
}

p + geom_map(
    aes(fill='Mean_Value'), 
    data=mean_by_state, map=us48, 
    map_join='State_Name'
    tooltips=layer_tooltips()
        .line('@{found name}')
        .line('mean:|@Mean_Value')
)

# Note the variable 'found name' that is used in tooltip. 
# Thanks to map_join this variable is available for tooltip processor.