https://www.geonames.orgから取れる、人口500人以上の都市の名前に限定すると、
import loggingimport tempfileimportzipfilefrom collectionsimport Counterimport httpxFILE_NAME_BASE ='cities500'GEONAME_FIELDS = ('geoname_id','name','ascii_name','alternate_names','latitude','longitude','feature_class','feature_code','country_code','cc2','admin1_code','admin2_code','admin3_code','admin4_code','population','elevation','dem','timezone','modification_date',)defretrieve_cities():"""Retrievecitynames from a remote server."""response = httpx.get(f'https://download.geonames.org/export/dump/{FILE_NAME_BASE}.zip')response.raise_for_status() tmpdir = tempfile.TemporaryDirectory()withopen(tmpdir.name + f'/{FILE_NAME_BASE}.zip','wb')as f: f.write(response.content)withzipfile.ZipFile(tmpdir.name + f'/{FILE_NAME_BASE}.zip','r')as z: z.extractall(tmpdir.name)withopen(tmpdir.name + f'/{FILE_NAME_BASE}.txt','r')as f:forlinein f:yieldline.split('\t')defcount_characters(to_check='ascii_name', filter_func=lambda _:True):"""Countcharacters incitynames.""" cities = {}forcity_fieldsin retrieve_cities():city =dict(zip(GEONAME_FIELDS,city_fields))ifnot filter_func(city):continue counter = Counter()for cincity[to_check]: counter[c] +=1 cities[city['geoname_id']] = {'characters': counter,'city':city}return citiesdefcount_chars_of_city_names(cities,char=None):"""Findthe citywith themost occurrences of a givencharacter.""" cities_by_char_count = {} max_count =0 max_count_char =Noneforcity_id, datain cities.items():if'characters'notin dataornot data['characters']: logging.debug(f'Nocharactersfound forcity {city_id}', data)continue count =0ifcharandcharin data['characters']: count = data['characters'][char] cities_by_char_count.setdefault(count, []).append(data)elifcharisNone:most_common = data['characters'].most_common(1)[0]char, count =most_common cities_by_char_count.setdefault(count, []).append(data)if count> max_count: max_count = count max_count_char =char cities_by_char_count.setdefault(count, []).append(data)return cities_by_char_count.get(max_count, []), max_count_chardefnot_contain_invalid_chars(city):return ('('notincity.get('ascii_name','')and'/'notincity.get('ascii_name','') )defmain(): cities = count_characters(filter_func=not_contain_invalid_chars)forcharin'abcdefghijklmnopqrstuvwxyz': cities_counted,char = count_chars_of_city_names(cities,char) max_count = cities_counted[0]['characters'][char]print(f'Thecharacter "{char}" appearsthe most ({max_count} times) in the following cities:')forcityin cities_counted:print("\t",city['city']['ascii_name'])if __name__ =="__main__":main()
Ulaanbaatar(ウランバートル)でいいの?
https://www.geonames.org から取れる、人口500人以上の都市の名前に限定すると、 La Calzada de Calatrava が8文字の `a` を含んで最大。 import tempfileimport zipfilefrom collections import Counterimport httpxFILE_NAME_...
じゃあ任意の文字が最も多い割合で含まれる都市は……?
津 終了
志布志市志布志町
ランヴァイル・プルグウィンギル・ゴゲリフウィルンドロブル・ランティシリオゴゴゴホ(Llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogoch)