IMet Word Embedding

From: https://www.kaggle.com/kokecacao/imet-word-embedding

Author: Hanke Chen

IMet Word Embedding

Here are my tests for using NLP to encode labels. Hope you find it helpful.

In [1]:
import numpy as np
import pandas as pd

from tqdm import tqdm_notebook as tqdm

GLOVE = '../input/glove840b300dtxt/glove.840B.300d.txt' #'../input/glove.840B.300d.txt'
LABELS = '../input/imet-2019-fgvc6/labels.csv'
TRAIN = '../input/imet-2019-fgvc6/train.csv'
TRAIN_IMG = '../input/imet-2019-fgvc6/train/{}.png'
In [2]:
def get_coefs(word, *arr):
    return word, np.asarray(arr, dtype='float32')
def load_embeddings(path):
    with open(path) as f:
        return dict(get_coefs(*line.strip().split(' ')) for line in tqdm(f))
In [3]:
all_words = ["abruzzi","achaemenid","aegean","afghan","after british","after german","after german original","after italian","after russian original","akkadian","alexandria-hadra","algerian","alsace","american","american or european","amsterdam","ansbach","antwerp","apulian","arabian","aragon","arica","asia minor","assyrian","atlantic watershed","attic","augsburg","augsburg decoration","augsburg original","austrian","avignon","avon","aztec","babylonian","babylonian or kassite","bactria-margiana archaeological complex","balinese","bavaria","bayreuth","beautiran","beauvais","belgian","berlin","birmingham","boeotian","bohemian","bologna","bordeaux","bow","brescia","bristol","british","british or french","british or scottish","brunswick","brussels","burma","burslem","byzantine","calima","cambodia","campanian","canaanite","canosan","castel durante","catalan","catalonia","caucasian","caughley","central asia","central european","central highlands","central italian","chalcidian","chantilly","chaumont-sur-loire","chelsea","chelsea-derby","chimu","china","chinese with dutch decoration","chinese with european decoration","chinese with french mounts","chiriqui","chorrera","chupicuaro","colima","colombian","colonial","colonial american","copenhagen","corinthian","coromandel coast","costa rica","costa rica or panama","cretan","crete","cyclades","cycladic","cypriot","cypriot or phoenician","czech","danish","deccan","dehua","delft","derby","deruta","devonshire","dresden","dublin","dutch","dyak","east greek","east greek/sardis","eastern european","eastern mediterranean","eastern mediterranean or italian","edinburgh","edomite","egypt","egyptian","elamite","england","etruria","etruscan","euboean","european","european bronze age","faliscan","ferrara","flemish","flemish or italian","florence","for american market","for british market","for continental market","for danish market","for european market","for french market","for iberian market","for portuguese market","for russian market","for swedish market","frankenthal","frankish","freiburg im breisgau","french","french or german","french or italian","french or swiss","fulda","furstenberg","gaul","geneva","genoa","german","german or swiss","ghassulian","gnathian","gonia","greek","greek islands","greek or roman","guanacaste-nicoya","gubbio","gurkha","haida","hanau","hattian","helladic","hilt","hittite","hochst","huastec","hungarian","hungary","huron","ica","inca","india","indian or nepalese","indonesia","inuit","iran","irish","isin-larsa","isin-larsaold babylonian","islamic","italian","italian or sicilian","italian or spanish","italic","italic-native","japan","javanese","jouy-en-josas","kathmandu valley","kazakhstan","kholmogory","kievan rus'","konigsberg","korea","la rochelle","laconian","lambayeque","lambeth","langobardic","leuven","lille","limoges","liverpool","london","london original","longton hall","lowestoft","ludwigsburg","lydian","lyons","macao","macaracas","macedonian","madrid","malayan","mali","manteno","maya","meissen","meissen with german","mennecy","mennecy or sceaux","mexican","mezcala","michoacan","milan","mimbres","minoan","mitanni","mixtec","moche","moche-wari","montelupo","moro","moroccan","moustiers","mughal","muisca","munich","mycenaean","nabataean","nailsea","nantes","naples","nasca","naxos","nayarit","neo-sumerian","neolithic","nepal","netherlandish","neuwied am rhein","nevers","nimes","north china","north indian","north italian","north netherlandish","northern european","northern india","northern italian","northwest china","northwest china/eastern central asia","norwegian","nuremberg","nymphenburg","old assyrian trading colony","olmec","orleans","ottonian","padua","pakistan","palermo","paracas","paris","parita","parthian","parthian or sasanian","peruvian","pesaro","philippine","phrygian","piedmont","polish","populonia","portuguese","potsdam","praenestine","proto-elamite","provincial","ptolemaic","qajar","quechua","remojadas","rhenish","roman","roman egyptian","rome","rouen","russian","saint-cloud","salinar","salzburg","san sabastian","sasanian","savoy","saxony","scandinavian","sceaux","scottish","scythian","seleucid","seville","sevres","sheffield","sicily","siena","silesia","sinceny","skyros","smyrna","south german","south italian","south netherlandish","southall","southern german","spanish","spitalfields","sri lankan","st. petersburg","staffordshire","stockholm","stoke-on-trent","strasbourg","sulawesi","sumatran","sumerian","surrey","swedish","swiss","syrian","tairona","tarentine","teano","teotihuacan","thailand","thanjavur","the hague","thessaly","thuringia","tibet","tibetan","tiwanaku","tlatilco","tlingit","tolita-tumaco","topara","tsimshian","turin","turkish","turkish or venice","ubaid","umbria","united states","unknown","urartian","urbino","urbino with gubbio luster","valencia","venice","veracruz","veraguas","verona","versailles","vienna","vietnam","villanovan","villeroy","vincennes","visigothic","vulci","wari","west slavic","western european","worcester","wurzburg","zenu","zoroastrian","zurich","abbies","abraham","abstraction","acanthus","acorns","acrobats","actors","actresses","adam","admirals","adonis","adoration of the magi","adoration of the sheperds","air transports","alexander the great","altars","amazons","amulets","amun","ancient greek","angels","anger","animals","anklet","annunciation","aphrodite","apocalypse","apollo","apostles","apples","arabic","archangel gabriel","arches","architects","architectural elements","architectural fragments","architecture","ariadne","armors","army","arrowheads","arrows","artemis","artists","assumption of the virgin","astronomy","athena","athletes","autumn","avalokiteshvara","axes","bacchus","badges","bagpipes","bakers","balconies","bamboo","baptism of christ","barns","baseball","basins","bathing","bathsheba","bats","battles","beaches","beads","beakers","bears","bedrooms","beds","bees","belts","benches","benjamin franklin","bes","bible","bicycles","billiards","birds","bishops","boars","boats","bobbins","bodhisattva","bodies of water","body parts","books","boots","bottles","bow and arrow","bowls","boxes","boxing","boys","bracelets","bridges","brooches","buckles","buddha","buddhism","buddhist religious figures","buffalos","buildings","buildings and structures","bulls","burial grounds","burials","butterflies","buttons","cabinets","calendars","camels","cameos","canals","candelabra","candles","candlesticks","cannons","capitals","carpets and rugs","carriages","cartouches","caryatids","castles","cathedrals","cats","cauldrons","caves","celestial bodies","censers","centaurs","ceremony","ceres","chairs","chalices","chariots","chess","chests","chickens","children","chinese","chinoiserie","christ","christian imagery","christianity","christmas","churches","circles","circus","cities","civil war","cleopatra","clocks","clothing and accessories","clouds","coat of arms","coats","coffeepots","coffins","coins","columns","commodes","concerts","contemplation","coptic","cornucopia","corpses","correspondence","corsets","costumes","couches","couples","courtyards","coverlets and quilts","cows","crabs","cradles","cranes","crescents","crocodiles","cross","crowd","crucifixion","cuneiform","cupid","cups","curtains","cutlery","daggers","daily life","daisies","dance","dancers","dancing","david","dawn","death","decorative designs","decorative elements","deer","deities","demons","descent from the cross","deserts","design elements","desks","devil","diadems","diamonds","diana","dice","dining","dionysus","dishes","docks","doctors","documents","dogs","dolls","dolphins","domes","donkeys","doors","doorways","doves","dragons","drawing","dresses","drinking","drinking glasses","drums","drunkenness","ducks","durga","eagles","earrings","easter","egg and dart","elephants","emblems","embroidery","emperor augustus","entombment","eros","esther","europa","eve","evening","ewers","eyes","facades","faces","factories","fairies","falcons","family","fans","farmers","farms","fathers","fauns","fear","feathers","feet","female nudes","fire","firearms","fireplaces","fireworks","fish","fishing","flags","flowers","flutes","fluting","food","footwear","forests","fortification","fountains","foxes","friezes","frogs","fruit","funerals","funerary objects","furniture","gadrooning","galatea","games","gardeners","gardens","garlands","gates","generals","genre scene","geometric patterns","george washington","gingham pattern","girls","globes","gloves","goats","goblets","goddess","gods","grapes","greek deities","greek figures","griffins","grotesques","guitars","hair","hammers","hands","harps","hathor","hats","hawks","heads","hell","helmets","hercules","hermes","hexagons","hieroglyphs","hills","hilts","hindu religious figures","hinduism","historical figures","holofernes","holy family","horns","horse riding","horses","horus","hospitals","houses","human figures","hunting","illness","incense burners","infants","inns","inscriptions","insects","insignia","interiors","isis","jackets","jainism","jars","jason","jesus","jewelry","jockeys","journals","judith","jugs","julius caesar","juno","jupiter","kettles","keys","kings","kitchens","knives","krishna","lace","ladders","ladles","lakes","lambs","lamentation","lamps","landforms","landscapes","last judgement","last supper","law","leaves","leda","leopards","lighting","lions","literature","liturgical objects","living rooms","lizards","lobsters","lockets","lotuses","louis xiv","love","lovers","lutes","madonna and child","maenads","magicians","maitreya","male nudes","mandolins","manjushri","manuscripts","maps","mark antony","markets","mars","mary magdalene","masks","massacres","medallions","medea","men","merchants","mercury","mice","military","military clothing","military equipment","minerva","mirrors","monkeys","monks","monsters","monuments","moon","moses","mosques","mothers","mountains","muses","music","musical instruments","musicians","mythical creatures","mythology","napoleon i","nativity","navy","necklaces","necktie","neptune","nero","netsuke","new testament","night","nike","nonrepresentational art","nymphs","obelisks","occupations","octagons","octopus","old testament","olive trees","opera","organs","ornament","orpheus","owls","painting","paisley","palaces","palmettes","pants","parks","parrots","party","peaches","peacocks","pediments","pendants","pentecost","peonies","percussion instruments","performance","perseus","pheasants","pianos","pigeons","pigs","pilasters","pinecones","pins","pitchers","plants","playing","playing cards","pocket watches","poetry","poets","polka-dot pattern","pomegranates","ponds","popes","portraits","poseidon","princes","princesses","prisms","prisoners","prisons","profiles","prostitutes","psyche","punishment","purses","putti","pyramids","queens","qur'an","rabbits","railways","rain","rams","reading","rectangles","religious events","religious texts","reliquaries","riding","rings","rivers","roads","robes","roman deities","roosters","rosaries","roses","rowing","ruins","sadness","sailors","saint anne","saint anthony","saint catherine","saint francis","saint george","saint jerome","saint john the baptist","saint john the evangelist","saint joseph","saint lawrence","saint mark","saint matthew","saint michael","saint paul","saint peter","saints","samples","sarcophagus","satire","satyrs","saucers","scarabs","scarves","schools","scorpions","screens","scrolls","sculpture","seals","seas","seascapes","seating furniture","self-portraits","serpents","servants","shakespeare","shakyamuni","sheep","shells","shepherds","shields","ships","shirts","shiva","shoes","sibyl","silenus","singers","singing","skeletons","skirts","skulls","sky","slavery","sleep","smoking","snails","snakes","snow","soldiers","spears","spectators","sphinx","sports","spring","squares","squirrels","stairs","stars","still life","stools","storage furniture","storms","strapwork","street scene","streets","stripes","students","suffering","suits","summer","sun","sundials","sunflowers","swans","sword guards","swords","tabernacles","tables","tablets","taoism","tapestries","taweret","tea caddy","tea drinking","teachers","teapots","telescopes","temples","tents","textile fragments","textiles","theatre","tigers","tombs","tools and equipment","towers","towns","toys","trains","transportation","trays","trees","triangles","tricorns","triton","trophies","trumpets","tulips","tunics","tureens","turtles","undergarment","uniforms","urns","utilitarian objects","vajrapani","vase fragments","vases","vegetables","venus","vestments","vests","victory","villages","vines","violas","violins","virgin mary","vishnu","volcanoes","vulcan","wagons","walking","wars","washing","watches","waterfalls","watermills","waves","weapons","weights and measures","wells","wind","windmills","windows","wine","winter","women","working","world war i","worshiping","wreaths","writing","writing implements","writing systems","zeus","zigzag pattern","zodiac"]
In [4]:
def build_matrix(all_words, path, statistics=True, embedding_index=None):
    if embedding_index is None: embedding_index = load_embeddings(path)
    
    if statistics:
        good_words = 0
        bad_words = 0
        for word in all_words:
            try:
                embedding_index[word]
                good_words = good_words+1
            except Exception as e:
                bad_words = bad_words+1
        print("good {}, bad {}, percent {}".format(good_words, bad_words, good_words/(good_words+bad_words)))
    
    embedding_matrix = dict()
    unknown_words = []
    text = ""
    
    for i, word in enumerate(all_words):
        try:
            embedding_matrix[word] = (np.array(embedding_index[word])).tolist()
        except KeyError:
            if " " in word or "-" in word:
                try:
                    words = word.replace("for ", "").replace(" or ", " ").replace(" and ", " ").replace(" with ", " ").replace(" of ", " ").replace("the ", " ").split(" ")
                    while "" in words:
                        words.remove("")
                    embedding_matrix[word] = (np.array([embedding_index[word] for word in words]).mean(axis=0)).tolist()
#                     text = text + "\n" + str(words)
                    continue
                except KeyError:
                    try:
                        words = (word.replace("for ", "").replace(" or ", " ").replace(" and ", " ").replace(" with ", " ").replace(" of ", " ").replace("the ", " ").replace("'", "").replace("/", " ") + "[]").replace("ish[]", "[]").replace("s[]", "[]").replace("[]", "").split(" ")
                        while "" in words:
                            words.remove("")
                        embedding_matrix[word] = (np.array([embedding_index[word] for word in words]).mean(axis=0)).tolist()
                        text = text + "\n" + str(words)
                        continue
                    except KeyError:
                        try:
                            words = (word.replace("for ", "").replace(" or ", " ").replace(" and ", " ").replace(" with ", " ").replace(" of ", " ").replace("the ", " ").replace("'", "").replace("/", " ") + "[]").replace("ish[]", "[]").replace("s[]", "[]").replace("[]", "").replace("-", " ").split(" ")
                            while "" in words:
                                words.remove("")
                            embedding_matrix[word] = (np.array([embedding_index[word] for word in words]).mean(axis=0)).tolist()
                            text = text + "\n" + str(words)
                            continue
                        except KeyError:
                                try:
                                    words = (word.replace("for ", "").replace(" or ", " ").replace(" and ", " ").replace(" with ", " ").replace(" of ", " ").replace("the ", " ").replace("'", "").replace("/", " ") + "[]").replace("ish[]", "[]").replace("s[]", "[]").replace("[]", "").replace("-", "").split(" ")
                                    while "" in words:
                                        words.remove("")
                                    embedding_matrix[word] = (np.array([embedding_index[word] for word in words]).mean(axis=0)).tolist()
                                    text = text + "\n" + str(words)
                                    continue
                                except KeyError:
                                    unknown_words.append(word)
                                    continue
                        continue
            elif "ish" in word or "s" or "''" in word:
                try:
                    words = (word+"[]").replace("ish[]", "[]").replace("s[]", "[]").replace("[]", "").replace("'", "").split(" ")
                    while "" in words:
                        words.remove("")
                    embedding_matrix[word] = (np.array([embedding_index[word] for word in words]).mean(axis=0)).tolist()
                    text = text + "\n" + str(words)
                    continue
                except KeyError:
                    unknown_words.append(word)
                    continue
            unknown_words.append(word)
    print(text)
    return embedding_matrix, unknown_words
In [5]:
embedding_index = load_embeddings(GLOVE)

In [6]:
embedding_matrix, unknown_words = build_matrix(all_words, GLOVE, embedding_index=embedding_index)
good_count = len(embedding_matrix.keys())
bad_count = len(unknown_words)
print("good {}, bad {}, percent {}".format(good_count, bad_count, good_count/(good_count + bad_count)))
# print("good {}, bad {}, percent {}".format(len(embedding_matrix.keys())-len(unknown_words), len(unknown_words), (len(embedding_matrix)-len(unknown_words))/(len(embedding_matrix)+len(unknown_words))))
good 813, bad 290, percent 0.7370806890299184

['alexandria', 'hadra']
['chaumont', 'sur', 'loire']
['chelsea', 'derby']
['east', 'greek', 'sardi']
['italic', 'native']
['moche', 'wari']
['neo', 'sumerian']
['netherland']
['north', 'netherland']
['northwest', 'china', 'eastern', 'central', 'asia']
['rhen']
['saint', 'cloud']
['south', 'netherland']
['abbie']
['quran']
['tricorn']
good 1008, bad 95, percent 0.9138712601994561
In [7]:
unknown_words
Out[7]:
['achaemenid',
 'ansbach',
 'apulian',
 'babylonian or kassite',
 'bactria-margiana archaeological complex',
 'beautiran',
 'boeotian',
 'calima',
 'campanian',
 'canosan',
 'caughley',
 'chalcidian',
 'chimu',
 'chorrera',
 'chupicuaro',
 'dehua',
 'deruta',
 'dyak',
 'edomite',
 'elamite',
 'euboean',
 'faliscan',
 'frankenthal',
 'freiburg im breisgau',
 'ghassulian',
 'gnathian',
 'gonia',
 'guanacaste-nicoya',
 'gubbio',
 'hattian',
 'helladic',
 'hochst',
 'huastec',
 'isin-larsa',
 'isin-larsaold babylonian',
 'jouy-en-josas',
 'kholmogory',
 "kievan rus'",
 'konigsberg',
 'laconian',
 'lambayeque',
 'langobardic',
 'ludwigsburg',
 'macaracas',
 'manteno',
 'mennecy',
 'mennecy or sceaux',
 'mezcala',
 'mimbres',
 'mitanni',
 'mixtec',
 'montelupo',
 'moustiers',
 'muisca',
 'nabataean',
 'nailsea',
 'neuwied am rhein',
 'nymphenburg',
 'ottonian',
 'paracas',
 'parita',
 'parthian or sasanian',
 'populonia',
 'praenestine',
 'proto-elamite',
 'qajar',
 'remojadas',
 'salinar',
 'sasanian',
 'sceaux',
 'seleucid',
 'sinceny',
 'skyros',
 'tairona',
 'tarentine',
 'teano',
 'tiwanaku',
 'tlatilco',
 'tolita-tumaco',
 'topara',
 'tsimshian',
 'ubaid',
 'urartian',
 'urbino with gubbio luster',
 'veraguas',
 'villanovan',
 'visigothic',
 'vulci',
 'avalokiteshvara',
 'gadrooning',
 'holofernes',
 'manjushri',
 'shakyamuni',
 'taweret',
 'vajrapani']
In [8]:
embedding_index['quran']
Out[8]:
array([-0.79794  ,  0.038742 , -0.12058  ,  0.4975   ,  0.45576  ,
       -0.5694   ,  0.28138  , -0.14481  ,  0.26921  , -0.34068  ,
       -0.053981 , -0.013837 , -0.61084  ,  0.55639  , -0.61457  ,
       -0.029045 , -0.50692  ,  0.6186   ,  0.14699  , -0.44893  ,
       -0.24328  , -0.27776  ,  0.91669  ,  0.39548  ,  0.81715  ,
       -0.02661  , -0.39559  , -0.42562  ,  0.041734 ,  1.1318   ,
        0.3013   ,  0.106    , -0.049379 , -0.18498  ,  0.17004  ,
        0.18775  , -0.57272  ,  0.24008  , -0.45708  ,  0.24307  ,
        0.13683  , -0.79857  ,  0.074791 , -0.12394  , -0.56854  ,
        0.022754 ,  0.3714   ,  0.26642  , -0.34136  ,  0.52184  ,
       -0.60285  ,  0.096595 , -0.58149  , -0.1976   , -0.32817  ,
       -0.61237  ,  0.25336  , -0.4741   , -0.26123  , -0.4441   ,
       -0.56245  , -0.28803  , -0.26443  , -0.55498  , -0.14627  ,
       -0.33944  , -0.5014   ,  0.31341  ,  0.59401  ,  0.44979  ,
       -0.28401  , -0.42987  ,  0.15481  , -0.92661  ,  0.33816  ,
       -0.09759  ,  0.42582  , -0.25295  ,  0.23603  ,  0.096068 ,
        0.73092  , -0.24118  ,  0.29538  , -0.20131  , -0.011356 ,
        0.17262  ,  0.48552  , -0.89602  , -0.15975  , -1.1268   ,
       -0.35901  , -0.27292  , -0.41855  ,  0.14515  ,  0.25579  ,
        0.35486  ,  0.45005  , -0.29608  ,  0.2174   , -0.88678  ,
        0.15311  , -0.17012  ,  0.45579  ,  0.56337  ,  0.35693  ,
       -1.3714   ,  0.018665 ,  0.29932  ,  0.40097  , -0.0057661,
       -0.96862  ,  0.92776  , -0.38294  , -0.59914  , -0.18379  ,
        0.22438  ,  0.35672  ,  0.41689  ,  0.29414  , -0.68988  ,
        0.047933 , -0.36199  ,  0.55094  ,  0.69914  ,  0.44573  ,
        0.17708  , -0.56528  , -0.39674  ,  0.93703  , -0.62675  ,
        0.15879  , -0.40349  ,  0.71094  , -0.40172  , -0.35552  ,
       -0.12311  ,  0.35649  , -0.080576 ,  0.09634  ,  0.094864 ,
       -1.3113   ,  0.3083   ,  0.22291  , -0.14055  , -0.15155  ,
       -0.1068   ,  0.60984  , -0.20204  , -0.75154  , -0.095729 ,
        0.028883 , -0.42239  ,  0.33544  , -0.28398  , -0.061679 ,
        0.38702  , -0.64217  , -0.37509  , -0.74628  ,  0.26497  ,
        0.42881  ,  0.049184 , -0.17532  ,  0.012862 , -0.043153 ,
        0.1292   , -0.67756  , -0.31886  , -0.37691  ,  0.17448  ,
       -0.20104  ,  0.91136  ,  0.89131  , -0.71439  , -0.012486 ,
       -0.060799 ,  0.0055993,  0.086543 , -0.19609  ,  0.0078492,
        0.40995  ,  0.11576  , -0.087443 ,  0.22963  ,  0.37033  ,
       -0.23995  , -0.44195  ,  0.19666  ,  1.1471   ,  0.062089 ,
       -0.41113  , -0.15976  ,  0.38868  ,  0.35393  , -0.34448  ,
        0.3485   ,  0.31501  , -0.38767  ,  1.0658   , -0.43497  ,
       -0.032901 ,  0.35149  ,  0.19256  , -0.75722  ,  0.17941  ,
       -0.33405  ,  0.76507  ,  0.36167  , -0.067552 ,  0.097109 ,
       -0.37818  ,  0.32683  , -0.010067 , -0.91336  ,  0.17552  ,
       -1.6135   , -0.59126  , -0.39041  , -0.35368  ,  0.19661  ,
        0.23019  , -0.68017  ,  0.02376  , -0.40125  , -0.45278  ,
        0.57748  , -0.077932 , -0.25077  , -0.5414   , -0.052836 ,
        0.75144  ,  0.045823 , -0.34114  ,  0.11529  , -0.80401  ,
        0.30065  ,  0.66715  ,  0.50323  ,  0.06739  , -0.71001  ,
       -0.086023 ,  0.48423  , -0.18536  , -0.57126  ,  0.20215  ,
       -0.50901  ,  0.27116  , -0.32003  , -0.34564  , -0.095792 ,
        0.11585  ,  0.12502  ,  0.072528 ,  0.10907  ,  0.88684  ,
       -0.014104 ,  0.47286  ,  0.25772  , -0.63818  , -0.34693  ,
        0.70439  , -0.6604   ,  0.15455  , -0.12037  ,  0.065951 ,
       -0.22729  ,  0.24867  ,  0.27362  ,  0.51367  ,  0.2704   ,
        0.091387 , -0.14143  , -0.2877   ,  1.3471   , -0.10408  ,
        0.27348  , -0.3991   , -0.3559   , -0.32319  , -0.07403  ,
        0.33321  , -0.37094  , -0.089775 , -0.18678  , -0.16748  ,
       -0.04625  ,  0.075674 ,  0.22547  , -0.019778 , -0.10191  ,
        0.65159  , -0.73702  , -0.80819  , -0.20682  ,  0.99883  ,
        0.30223  , -0.1516   ,  0.5578   ,  0.11049  ,  0.1554   ],
      dtype=float32)
In [9]:
from scipy.spatial.distance import cosine

print(cosine(embedding_index['english'], embedding_index['chinese']), cosine(embedding_index['english'], embedding_index['atom']))
0.40422719717025757 0.8550852090120316
In [10]:
import operator

def find_similar_word(target):
    dic = dict()
    if type(target) == str: target = embedding_matrix[target]
    for word in embedding_matrix.keys():
        dic[word] = cosine(target, embedding_matrix[word])
    return sorted(dic.items(), key=operator.itemgetter(1))
In [11]:
find_similar_word("female nudes")[:10] # well it is similar to virgin mary. The Russians love female nudes too!
Out[11]:
[('female nudes', 0.0),
 ('male nudes', 0.02565499041293351),
 ('girls', 0.3377243583779712),
 ('women', 0.3628658927872703),
 ('russian', 0.45715669441769413),
 ('european bronze age', 0.4584656321440683),
 ('actresses', 0.4858222857491613),
 ('virgin mary', 0.4996840309487608),
 ('men', 0.5072803030274566),
 ('couples', 0.5077622915840606)]
In [12]:
print("Russian to Nudes: {}, Chinese to Nudes: {}".format(cosine(embedding_index["russian"], embedding_index["nudes"]), cosine(embedding_index["chinses"], embedding_index["nudes"])))
Russian to Nudes: 0.47605347633361816, Chinese to Nudes: 0.8742377907037735
In [13]:
find_similar_word("european bronze age")
Out[13]:
[('european bronze age', 0.0),
 ('american or european', 0.3405124024722851),
 ('european', 0.3413597551901011),
 ('chinese with european decoration', 0.35515734170302427),
 ('western european', 0.37797452429531764),
 ('northern european', 0.3973158135981917),
 ('central european', 0.40482240050618135),
 ('eastern european', 0.4052604553125496),
 ('for european market', 0.4214208451677425),
 ('after british', 0.4219498602022408),
 ('american', 0.43599795485782733),
 ('after russian original', 0.43958116777010425),
 ('colonial american', 0.4404927731393651),
 ('chinese with french mounts', 0.44637080897784176),
 ('chinese with dutch decoration', 0.4507231307772265),
 ('british or french', 0.4509546210033871),
 ('female nudes', 0.4584656321440683),
 ('after german', 0.459609127766223),
 ('after italian', 0.4605255772788649),
 ('male nudes', 0.462419701879329),
 ('for american market', 0.46339644027615523),
 ('after german original', 0.4667706542459734),
 ('for british market', 0.47378978552638007),
 ('british', 0.4773645549246027),
 ('world war i', 0.48227869575732896),
 ('roman egyptian', 0.48435783270853205),
 ('for russian market', 0.4859487803568453),
 ('greek figures', 0.49103715261784553),
 ('eastern mediterranean or italian', 0.49236717970403654),
 ('northern italian', 0.49448510940507484),
 ('old testament', 0.4953395643427183),
 ('french or german', 0.49575644024344845),
 ('greek or roman', 0.4964969117392508),
 ('central italian', 0.4974973041589281),
 ('ancient greek', 0.5032169551454728),
 ('roman', 0.5046831690025694),
 ('british or scottish', 0.5047430688157764),
 ('southern german', 0.5076783991575842),
 ('french or italian', 0.5088467460090597),
 ('northwest china/eastern central asia', 0.5090626624459191),
 ('for french market', 0.5094655465457505),
 ('south german', 0.51047842113062),
 ('north china', 0.5177428716190966),
 ('north italian', 0.5191609354753457),
 ('russian', 0.5211179595693742),
 ('women', 0.5231542031229018),
 ('south italian', 0.5262762974037641),
 ('asia minor', 0.527970386366666),
 ('east greek', 0.5300589108595231),
 ('china', 0.5333895788696191),
 ('for swedish market', 0.5347566825637644),
 ('england', 0.5356092712863583),
 ('sculpture', 0.5356118391865504),
 ('northwest china', 0.5359060673165539),
 ('german', 0.5360786427067752),
 ('northern india', 0.5367535857788293),
 ('central asia', 0.5386999925755974),
 ('london original', 0.5389693232509096),
 ('french', 0.5391703950810258),
 ('italian or spanish', 0.5414653244467573),
 ('german or swiss', 0.543442914768876),
 ('for danish market', 0.5443153856115437),
 ('czech', 0.5455669908561114),
 ('french or swiss', 0.5461689055412479),
 ('descent from the cross', 0.5466249907622368),
 ('italian', 0.5475456851832905),
 ('girls', 0.5478439985612573),
 ('chinese', 0.5480587109793107),
 ('saint mark', 0.5499314987076924),
 ('buddhist religious figures', 0.5536754845211637),
 ('united states', 0.5578955667859726),
 ('north indian', 0.5580960166584211),
 ('turkish or venice', 0.5596435214736727),
 ('roman deities', 0.5599083637215252),
 ('men', 0.5603626408975524),
 ('japan', 0.5614459314724617),
 ('old assyrian trading colony', 0.561527082492184),
 ('decorative elements', 0.5632318568535088),
 ('still life', 0.5637984756271899),
 ('coat of arms', 0.5664994996128696),
 ('decorative designs', 0.5666646922962294),
 ('military clothing', 0.5677701805740759),
 ('madonna and child', 0.5689924441020968),
 ('coins', 0.5704711971305961),
 ('hindu religious figures', 0.5730444273758941),
 ('virgin mary', 0.574990286628853),
 ('human figures', 0.5752009710209156),
 ('alexander the great', 0.576322313517535),
 ('eastern mediterranean', 0.5765917159184633),
 ('greek islands', 0.5786648560934866),
 ('historical figures', 0.5804229826063432),
 ('jewelry', 0.5821402554625991),
 ('greek', 0.5825372337488999),
 ('irish', 0.5831037249836201),
 ('turkish', 0.5844235549098336),
 ('egyptian', 0.5851532460895863),
 ('boys', 0.5853671405014647),
 ('swedish', 0.5863469706317037),
 ('saint george', 0.5910012494578527),
 ('assumption of the virgin', 0.5910183359708956),
 ('for continental market', 0.5926341701860124),
 ('polish', 0.5932641158297871),
 ('painting', 0.5949962235852304),
 ('austrian', 0.5980522267087761),
 ('saint michael', 0.5986740883326616),
 ('cross', 0.5991411170810881),
 ('saint peter', 0.600975483788899),
 ('flemish or italian', 0.6033286418853041),
 ('horse riding', 0.6051717598331026),
 ('rome', 0.6066808463787909),
 ('spanish', 0.6071822180719795),
 ('dutch', 0.6095896627622557),
 ('danish', 0.6095995428575914),
 ('greek deities', 0.610582335376732),
 ('architectural elements', 0.6111178022739403),
 ('west slavic', 0.6115175119988905),
 ('mexican', 0.6117412708529464),
 ('scottish', 0.6126098588127855),
 ('london', 0.613447507571319),
 ('napoleon i', 0.6138006093283801),
 ('chess', 0.6140191485566859),
 ('george washington', 0.6145673969432818),
 ('new testament', 0.6148402263772256),
 ('furniture', 0.6156040186356608),
 ('caucasian', 0.6187669836622642),
 ('saint paul', 0.6189129619176492),
 ('paris', 0.6198437545591111),
 ('saint john the baptist', 0.6202443186376031),
 ('egypt', 0.6205065848716769),
 ('for portuguese market', 0.6218768288935597),
 ('religious events', 0.6231714459608059),
 ('children', 0.6232302329931471),
 ('scandinavian', 0.6240610962635669),
 ('india', 0.6244565839930003),
 ('rings', 0.6250398604170952),
 ('trophies', 0.6256557451056708),
 ('living rooms', 0.6271052370392025),
 ('saint anthony', 0.6278735413127148),
 ('body parts', 0.6283819250413774),
 ('colonial', 0.6290953207752598),
 ('holy family', 0.6298343110554405),
 ('ornament', 0.6321081024709934),
 ('christmas', 0.6347779894550416),
 ('monuments', 0.6351766431322978),
 ('vase fragments', 0.6367980665789319),
 ('mothers', 0.6368225559917202),
 ('saint-cloud', 0.6377419134695552),
 ('hungary', 0.6390297117140473),
 ('sun', 0.6390373268004785),
 ('christian imagery', 0.6393648524816878),
 ('hair', 0.6394639890300463),
 ('civil war', 0.6403038581046788),
 ('seating furniture', 0.6411523985285414),
 ('bears', 0.6415221783187215),
 ('design elements', 0.641690576334073),
 ('korea', 0.6425060143504442),
 ('architectural fragments', 0.643230401700531),
 ('saint anne', 0.6437308810801472),
 ('bodies of water', 0.6442971289843791),
 ('athletes', 0.6454107784940839),
 ('goddess', 0.6476437531550645),
 ('saint joseph', 0.6478677651041801),
 ('faces', 0.6481339330298408),
 ('eyes', 0.6492268635209024),
 ('celestial bodies', 0.6498743318231541),
 ('navy', 0.6513385951517561),
 ('clothing and accessories', 0.6518000065575458),
 ('drinking glasses', 0.6527694203233112),
 ('death', 0.6530576786895713),
 ('saint matthew', 0.6534495701952181),
 ('daily life', 0.6550170688034826),
 ('military equipment', 0.6550170863906066),
 ('playing cards', 0.6554632286974995),
 ('saint catherine', 0.6554837913011209),
 ('autumn', 0.6561963118973131),
 ('pendants', 0.6566236634946883),
 ('swords', 0.657258357728623),
 ('olive trees', 0.6576990209802898),
 ('dancing', 0.6578328353020266),
 ('literature', 0.6578867201441527),
 ('hungarian', 0.6581962355226568),
 ('earrings', 0.6584112193907918),
 ('vases', 0.6588510284366289),
 ('horses', 0.6596551684599405),
 ('military', 0.6608434245529853),
 ('dance', 0.6608928107319242),
 ('beads', 0.6610965326581777),
 ('saint john the evangelist', 0.6614557757246),
 ('geometric patterns', 0.6614946887167149),
 ('arabian', 0.6616918081996763),
 ('saint francis', 0.6620282219629506),
 ('north netherlandish', 0.6621236478240679),
 ('norwegian', 0.6621807570853975),
 ('bracelets', 0.6633953293299031),
 ('sword guards', 0.6651797856005675),
 ('portraits', 0.6652044381974527),
 ('meissen with german', 0.6652096997402384),
 ('south netherlandish', 0.6655933791749331),
 ('bathing', 0.6668318406137763),
 ('saint lawrence', 0.667723221750564),
 ('lamps', 0.6682917208420678),
 ('italian or sicilian', 0.6687830082745807),
 ('unknown', 0.6692816778357905),
 ('dawn', 0.6705099425382401),
 ('christ', 0.6711796697061487),
 ('religious texts', 0.6713209436632187),
 ('berlin', 0.6716204251340063),
 ('gods', 0.6719700805345767),
 ('couples', 0.6725114793754614),
 ('iran', 0.6725769387644146),
 ('costumes', 0.6726677452637466),
 ('sports', 0.6726682352049855),
 ('boxing', 0.6727012249171169),
 ('summer', 0.6740188612400378),
 ('swiss', 0.6741114789535738),
 ('buddha', 0.6748032882291924),
 ('riding', 0.6749728667175919),
 ('storage furniture', 0.6757062043781723),
 ('bow', 0.6763702894842322),
 ('dolls', 0.6770606269066974),
 ('weights and measures', 0.6777569487859192),
 ('musical instruments', 0.6784063265102244),
 ('dragons', 0.6786911022958717),
 ('egg and dart', 0.678809634522793),
 ('east greek/sardis', 0.6789931489676606),
 ('wine', 0.6806577769734857),
 ('pocket watches', 0.6814671593588595),
 ('emperor augustus', 0.6815973855474442),
 ('army', 0.6816069572992873),
 ('street scene', 0.6820567833554585),
 ('spears', 0.6820994088118583),
 ('chelsea-derby', 0.68260430073785),
 ('winter', 0.6827154237702109),
 ('thailand', 0.6831266508920171),
 ('aztec', 0.6831645379628535),
 ('globes', 0.6838022175333399),
 ('moon', 0.6848615628161355),
 ('necklaces', 0.6850324666725789),
 ('heads', 0.6852643526319474),
 ('david', 0.6858255463210919),
 ('architecture', 0.6858363875902819),
 ('eve', 0.6858922651446238),
 ('tibetan', 0.6860037603296989),
 ('jesus', 0.6860723584613979),
 ('fathers', 0.6868275886263198),
 ('fish', 0.6875730457806468),
 ('arabic', 0.6878783083367912),
 ('indian or nepalese', 0.6887425513827762),
 ('hands', 0.6893352181521124),
 ('clocks', 0.689376288358766),
 ('mercury', 0.6898687685862384),
 ('diamonds', 0.6899278932470009),
 ('wars', 0.6913289117200272),
 ('kings', 0.6914305099834104),
 ('saint jerome', 0.6917961279464195),
 ('saints', 0.6922907816191561),
 ('angels', 0.692317436500856),
 ('spring', 0.6930476781587155),
 ('mythology', 0.6934534291960979),
 ('tea drinking', 0.6936332726343215),
 ('playing', 0.6941101582450199),
 ('christianity', 0.6955858522287699),
 ('dancers', 0.6955939882926195),
 ('sky', 0.6957357931773052),
 ('sailors', 0.695843666633628),
 ('infants', 0.696331177040405),
 ('smoking', 0.6963810750784245),
 ('genre scene', 0.6966369590774009),
 ('easter', 0.696696644891176),
 ('textile fragments', 0.6970124907216348),
 ('love', 0.6972665744742044),
 ('inscriptions', 0.6972892018930613),
 ('bow and arrow', 0.6974096054220854),
 ('burial grounds', 0.6974704466447804),
 ('shoes', 0.697586889977772),
 ('urns', 0.6980896193346372),
 ('queens', 0.698186677191301),
 ('lions', 0.6986747569386605),
 ('baptism of christ', 0.6987222819684018),
 ('drinking', 0.6988772188763975),
 ('drawing', 0.6990354210753518),
 ('chests', 0.699146482845802),
 ('animals', 0.6992481109792139),
 ('mythical creatures', 0.6996091556313802),
 ('lighting', 0.7001544357166771),
 ('last judgement', 0.7003072488714062),
 ('games', 0.70106631441846),
 ('for iberian market', 0.7016124744688218),
 ('belgian', 0.7016710582792942),
 ('vienna', 0.7023408277502221),
 ('islamic', 0.7025241499628436),
 ('provincial', 0.7025664320668514),
 ('bowls', 0.7027165494716072),
 ('textiles', 0.7036091552988128),
 ('utilitarian objects', 0.7040321905640612),
 ('tapestries', 0.7040441706891987),
 ('fountains', 0.7041177811701049),
 ('working', 0.7048608233283584),
 ('artists', 0.7051128419392811),
 ('suits', 0.7054054686114488),
 ('emblems', 0.7055353878285029),
 ('watches', 0.7061389984935758),
 ('family', 0.7062115358070092),
 ('neolithic', 0.7064607605972529),
 ('eagles', 0.7064839695909959),
 ('capitals', 0.7066511137908598),
 ('liturgical objects', 0.7067126548640236),
 ('cities', 0.7070017656009266),
 ('circles', 0.707354973465411),
 ('venice', 0.7073862431888902),
 ('bamboo', 0.7075405865911594),
 ('medallions', 0.70966721148575),
 ('dresses', 0.7096753839391274),
 ('toys', 0.7099915505870127),
 ('skulls', 0.7104755616449148),
 ('pigs', 0.7105897821793081),
 ('coats', 0.7106267015738821),
 ('funerary objects', 0.7106952369898365),
 ('opera', 0.710802762931743),
 ('buckles', 0.7112743119284827),
 ('peruvian', 0.7112773535741257),
 ('arches', 0.71134383731386),
 ('chairs', 0.7114144094584076),
 ('dogs', 0.7115336556658205),
 ('mark antony', 0.7118976065315561),
 ('byzantine', 0.7122482343569438),
 ('roses', 0.7130214789955516),
 ('hunting', 0.7131418602339433),
 ('lace', 0.7135663260539759),
 ('bible', 0.7144455709843641),
 ('lovers', 0.7147620572811975),
 ('bordeaux', 0.7151253019782431),
 ('colombian', 0.7153254954151648),
 ('central highlands', 0.7153930719386863),
 ('doors', 0.7154229867177198),
 ('birds', 0.7173564515751166),
 ('baseball', 0.717564633503583),
 ('last supper', 0.7176437409307962),
 ('benjamin franklin', 0.7181459070019678),
 ('feathers', 0.7187114052574308),
 ('music', 0.7190046441787272),
 ('costa rica or panama', 0.7198539724959601),
 ('belts', 0.7199404917093176),
 ('temples', 0.720055674855435),
 ('candelabra', 0.7200656285623293),
 ('portuguese', 0.7202243938894566),
 ('cupid', 0.7203600663078764),
 ('fishing', 0.7205274076548891),
 ('tibet', 0.7209921758749416),
 ('helmets', 0.7210351363199042),
 ('munich', 0.7213565476237583),
 ('philippine', 0.7216393765867086),
 ('zodiac', 0.7224072052421566),
 ('adam', 0.7226646890335657),
 ('writing systems', 0.722993609770251),
 ('theatre', 0.7230495551621603),
 ('boots', 0.7235357902503775),
 ('diana', 0.7235538609771541),
 ('shields', 0.7241768181854518),
 ('st. petersburg', 0.7244364862902501),
 ('landscapes', 0.7248990500941339),
 ('pins', 0.7250169797628855),
 ('derby', 0.7251046693054484),
 ('buildings and structures', 0.7251095149290949),
 ('amsterdam', 0.7254509854594398),
 ('soldiers', 0.7254929156575152),
 ('hell', 0.7259312743143198),
 ('deer', 0.7265354166029405),
 ('europa', 0.7265929707771592),
 ('cats', 0.7268436343280738),
 ('chelsea', 0.7271313976557201),
 ('sheep', 0.7273417714008412),
 ('buildings', 0.7274602859745738),
 ('snow', 0.7275791317799578),
 ('scrolls', 0.7279555095872601),
 ('food', 0.7281163679599375),
 ('venus', 0.7281859218136981),
 ('gates', 0.7286655063253833),
 ('hilt', 0.7291478503037649),
 ('rowing', 0.7292353780248629),
 ('dining', 0.7312108410286371),
 ('weapons', 0.7313974834781224),
 ('uniforms', 0.7317068297455909),
 ('indonesia', 0.7318055259479617),
 ('buddhism', 0.7318590163740497),
 ('masks', 0.7326445301809263),
 ('orleans', 0.7327512727076624),
 ('interiors', 0.7331958144114461),
 ('mirrors', 0.7333724317566528),
 ('brooches', 0.7334760096095039),
 ('moroccan', 0.7337089801020479),
 ('stripes', 0.7340735518511673),
 ('costa rica', 0.7342257892932051),
 ('boats', 0.7342940827994),
 ('vietnam', 0.7345710403120909),
 ('actresses', 0.7348177173636654),
 ('curtains', 0.7349720057409905),
 ('fruit', 0.7350936039656331),
 ('candles', 0.7352452921113677),
 ('pyramids', 0.7352829437565385),
 ('victory', 0.7370426217938346),
 ('leaves', 0.737437487248028),
 ('augsburg decoration', 0.7381439869758),
 ('circus', 0.7385407667528501),
 ('liverpool', 0.7388418855669419),
 ('percussion instruments', 0.738951822849742),
 ('italic-native', 0.7390198554264363),
 ('avon', 0.7394148277111865),
 ('birmingham', 0.7397252174020265),
 ('singers', 0.7397297650007388),
 ('ruins', 0.7399512279100544),
 ('milan', 0.7401152751063926),
 ('poetry', 0.7403917360846677),
 ('atlantic watershed', 0.7406205481704196),
 ('walking', 0.74101562927785),
 ('florence', 0.7411788959135273),
 ('trees', 0.741413590396061),
 ('horns', 0.7420471099123389),
 ('bohemian', 0.7420815507485772),
 ('vines', 0.7432781660528363),
 ('suffering', 0.7440938975891641),
 ('houses', 0.7441409998364426),
 ('nativity', 0.7441855123101191),
 ('castles', 0.7443374469930052),
 ('mountains', 0.7451994619157218),
 ('actors', 0.7454344489565028),
 ('cabinets', 0.7456844099339084),
 ('ceremony', 0.746133641542795),
 ('madrid', 0.7461544350442604),
 ('feet', 0.746362951158047),
 ('carpets and rugs', 0.7463926482352317),
 ('occupations', 0.7467741053866188),
 ('nymphs', 0.7472825941293171),
 ('tea caddy', 0.7478426548691501),
 ('seals', 0.7480754584116922),
 ('tables', 0.7481878017271171),
 ('stars', 0.7482195789208888),
 ('flags', 0.7484181729082442),
 ('louis xiv', 0.7484573668672776),
 ('dublin', 0.7486374564107219),
 ('flowers', 0.7490094068214779),
 ('pakistan', 0.749148840808477),
 ('law', 0.7494321832181159),
 ('towers', 0.7499387777136818),
 ('embroidery', 0.750107024150117),
 ('schools', 0.7502457557002884),
 ('sheffield', 0.7511997459068899),
 ('profiles', 0.7517953779312411),
 ('bulls', 0.7519312769706045),
 ('samples', 0.7519833275092069),
 ('bottles', 0.7520087020337813),
 ('skirts', 0.7522184783334862),
 ('inca', 0.7523327589456132),
 ('illness', 0.7528209258815383),
 ('tombs', 0.7528293195139034),
 ('stockholm', 0.7535258450922662),
 ('hills', 0.7541774046699007),
 ('fireplaces', 0.754813294828567),
 ('poets', 0.7549274575569282),
 ('bavaria', 0.755073630075849),
 ('boxes', 0.7552233510096337),
 ('cutlery', 0.755249914079566),
 ('dishes', 0.7557737549378303),
 ('elephants', 0.7557893658164481),
 ('stairs', 0.7564705186683016),
 ('beaches', 0.7573563208039407),
 ('monkeys', 0.7574646891997241),
 ('kathmandu valley', 0.7576280099535644),
 ('tools and equipment', 0.7580429317681545),
 ('waves', 0.7582860455592177),
 ('footwear', 0.7583309463930409),
 ('battles', 0.7585490661805582),
 ('daggers', 0.7585835207883651),
 ('jugs', 0.7585835712884128),
 ('knives', 0.7590245397925708),
 ('fire', 0.7591598473590746),
 ('fear', 0.7592705363357253),
 ('shirts', 0.7593826277526505),
 ('windows', 0.7595394444031701),
 ('palaces', 0.759731024166994),
 ('badges', 0.7598293350614433),
 ('guitars', 0.7598792710975042),
 ('candlesticks', 0.760224494533338),
 ('rabbits', 0.7602762923859608),
 ('washing', 0.7603785167954854),
 ('jason', 0.76110872682259),
 ('churches', 0.7612237173078924),
 ('books', 0.7613786503283756),
 ('hats', 0.7615433920357906),
 ('musicians', 0.7620978402523892),
 ('cleopatra', 0.7621618900397813),
 ('skeletons', 0.762293050766315),
 ('lakes', 0.7625139565967313),
 ('bristol', 0.7625392329460412),
 ('grapes', 0.7627723910892807),
 ('seas', 0.7630703526679815),
 ('bicycles', 0.7634562030781313),
 ('gardens', 0.763853504982311),
 ('punishment', 0.7638791250479001),
 ('princesses', 0.7641442088482933),
 ('gingham pattern', 0.76740892287967),
 ('students', 0.767410232781406),
 ('shakespeare', 0.767691503312822),
 ('prostitutes', 0.7679717862961235),
 ('markets', 0.7679871872979475),
 ('monsters', 0.7679989862896058),
 ('sphinx', 0.7680788172490478),
 ('nike', 0.7683380395591588),
 ('ducks', 0.768436329972097),
 ('nonrepresentational art', 0.7684544326451143),
 ('devil', 0.769271451096841),
 ('princes', 0.7694499520972672),
 ('apollo', 0.7700023539886593),
 ('dolphins', 0.770240094212777),
 ('evening', 0.7712623500752167),
 ('cups', 0.7714258564551688),
 ('naples', 0.771595475538424),
 ('columns', 0.7716885602783907),
 ('cathedrals', 0.771847033915676),
 ('goats', 0.7720608084420115),
 ('inuit', 0.772329527321505),
 ('maps', 0.7725685212270069),
 ('caves', 0.7726186379668177),
 ('beds', 0.772817786541956),
 ('slavery', 0.7728846085731327),
 ('shells', 0.7732703041207123),
 ('bridges', 0.7734035851307083),
 ('pants', 0.774361765638258),
 ('tablets', 0.7746274464666919),
 ('facades', 0.7746534521236503),
 ('pianos', 0.7749879438887964),
 ('jackets', 0.7752537855670749),
 ('deities', 0.7758165229604397),
 ('fairies', 0.7758603818576186),
 ('doctors', 0.7763133151588357),
 ('cypriot or phoenician', 0.7766278704704849),
 ('dice', 0.7766662144110966),
 ('apples', 0.7770465960419688),
 ('towns', 0.7770601403470629),
 ('teachers', 0.7771945769322545),
 ('drums', 0.7773217602300838),
 ('tigers', 0.7773358035839028),
 ('gloves', 0.7780240391542182),
 ('geneva', 0.7782691029877249),
 ('fortification', 0.7785554592508507),
 ('aegean', 0.7789849094912924),
 ('writing implements', 0.7791459714214533),
 ('firearms', 0.7800416572187034),
 ('villages', 0.7803492841577445),
 ('insignia', 0.7803991033169331),
 ('night', 0.7807014370100058),
 ('maya', 0.780818977048979),
 ('streets', 0.7809487720965465),
 ('merchants', 0.7809644078704903),
 ('air transports', 0.7809825436341691),
 ('afghan', 0.7811917417799608),
 ('architects', 0.781351934668642),
 ('athena', 0.7813527722925002),
 ('benches', 0.7813928552750914),
 ('purses', 0.7818258930898065),
 ('party', 0.782131694067512),
 ('plants', 0.7823054168255851),
 ('writing', 0.7823661466163865),
 ('octopus', 0.7825554143524751),
 ('copenhagen', 0.7829861132447608),
 ('neo-sumerian', 0.783378622738091),
 ('sleep', 0.7835366173148562),
 ('scarves', 0.7837790474532345),
 ('rivers', 0.7838907604024821),
 ('wind', 0.7842633135514281),
 ('generals', 0.7846945568424015),
 ('cows', 0.7847665781110311),
 ('violins', 0.784922288371897),
 ('goblets', 0.7849939432678421),
 ('performance', 0.7851024724394398),
 ('amulets', 0.7859144711231842),
 ('billiards', 0.7861661997885473),
 ('stools', 0.7865693196588007),
 ('burma', 0.786606462988004),
 ('mary magdalene', 0.7868716536958038),
 ('robes', 0.7871989853479305),
 ('rain', 0.787220227185169),
 ('zigzag pattern', 0.7877334997681691),
 ('singing', 0.7878127826032675),
 ('jars', 0.7880553439207694),
 ('brussels', 0.7881568831589639),
 ('flutes', 0.7882163030433726),
 ('cypriot', 0.7905693111384358),
 ('crowd', 0.7906390135234459),
 ('butterflies', 0.7915749896085986),
 ('organs', 0.7917747625559844),
 ('cambodia', 0.7918253135023363),
 ('etruscan', 0.7920199698984334),
 ('parks', 0.79221757307916),
 ('ladders', 0.7923416932557806),
 ('doorways', 0.7923965159588924),
 ('spectators', 0.7934105070119312),
 ('paisley', 0.7940754621126997),
 ('monks', 0.7941553757779517),
 ('astronomy', 0.795379858436414),
 ('transportation', 0.7955048390704549),
 ('manuscripts', 0.7955579096253582),
 ('pigeons', 0.7957561746985831),
 ('arrowheads', 0.7957585811546342),
 ('concerts', 0.7962025899345232),
 ('edinburgh', 0.7962787630690096),
 ('polka-dot pattern', 0.7965514901664933),
 ('armors', 0.7967807768798811),
 ('balinese', 0.7968631051705836),
 ('rams', 0.7974298986689838),
 ('limoges', 0.7975374935161872),
 ('wreaths', 0.7976170353765402),
 ('julius caesar', 0.7977804395254066),
 ('hermes', 0.79814482057064),
 ('railways', 0.7982541776797963),
 ('ships', 0.7983545137139353),
 ('verona', 0.7989587102912064),
 ('zeus', 0.7991242708417485),
 ('psyche', 0.7992017952939032),
 ('shepherds', 0.799426166915936),
 ('zurich', 0.7996334601715941),
 ('forests', 0.799707139867342),
 ('keys', 0.7998041610417067),
 ('kitchens', 0.8004610729250282),
 ('popes', 0.8013525482305407),
 ('arrows', 0.8017719946379332),
 ('desks', 0.8019901295815599),
 ('horus', 0.8023427223874764),
 ('trains', 0.8024980626953332),
 ('crete', 0.8026684331314479),
 ('versailles', 0.8034710543359549),
 ('incense burners', 0.8035997142506972),
 ('nepal', 0.8038282512532466),
 ('surrey', 0.8041700489582329),
 ('chariots', 0.8043208673256423),
 ('frogs', 0.804515441015589),
 ('domes', 0.8046284532600497),
 ('bakers', 0.8047080351852435),
 ('flemish', 0.8055986571420642),
 ('sarcophagus', 0.8059611173703842),
 ('burials', 0.8060714325849956),
 ('boars', 0.806398640096549),
 ('bees', 0.8065901013962871),
 ('vegetables', 0.8071154155741611),
 ('trumpets', 0.8071544862456663),
 ('crucifixion', 0.8076000553919642),
 ('catalan', 0.8079790973209746),
 ('factories', 0.8080630764155938),
 ('abstraction', 0.8083600282517198),
 ('squares', 0.8087107184640892),
 ('clouds', 0.808759124045632),
 ('la rochelle', 0.8091479493494528),
 ('chickens', 0.8092129486769057),
 ('staffordshire', 0.809231642974602),
 ('snakes', 0.8108813151981475),
 ('augsburg original', 0.8109816874165833),
 ('couches', 0.8109964908744975),
 ('swans', 0.8118447944112253),
 ('hinduism', 0.8118659723925735),
 ('farmers', 0.8118903358851622),
 ('netherlandish', 0.8131036427099202),
 ('cannons', 0.8135921520106291),
 ('neptune', 0.8141425184918604),
 ('moses', 0.8147660122443909),
 ('wells', 0.8149540932062619),
 ('servants', 0.8150615936915737),
 ('carriages', 0.8154801545527506),
 ('hercules', 0.8159238922052662),
 ('fans', 0.8170312234664088),
 ('roads', 0.8172195579177028),
 ('demons', 0.8176543400327595),
 ('archangel gabriel', 0.8181822480153402),
 ('journals', 0.8184082973641773),
 ('bats', 0.8186201899322989),
 ('lambs', 0.8189315944394905),
 ('basins', 0.8192655033105138),
 ('ponds', 0.8196978805965759),
 ('bedrooms', 0.820751043929209),
 ('donkeys', 0.8207756657772729),
 ('sunflowers', 0.8208803988135198),
 ('screens', 0.8212949725487524),
 ('syrian', 0.8213487674614651),
 ('macedonian', 0.8213632215222063),
 ('barns', 0.8214776857082403),
 ('camels', 0.8216558639301774),
 ('worcester', 0.8218830726584162),
 ('farms', 0.822066256146323),
 ('cranes', 0.8225475693380099),
 ('corsets', 0.8230689743636617),
 ('axes', 0.823149769561911),
 ('turtles', 0.8234815408268498),
 ('dresden', 0.824771007073121),
 ('tulips', 0.8254545925248081),
 ('prisoners', 0.8256633004801188),
 ('peaches', 0.8261334649318739),
 ('rosaries', 0.8267198677425769),
 ('acanthus', 0.8268512721639947),
 ('fireworks', 0.8270324962691196),
 ('vestments', 0.8272235561614107),
 ('kazakhstan', 0.8273262249886391),
 ('calendars', 0.8275250572874662),
 ('jupiter', 0.8282249585764775),
 ('balconies', 0.8283431484953083),
 ('apocalypse', 0.8287948605626718),
 ('reading', 0.8294089549945749),
 ('coromandel coast', 0.8298622182609146),
 ('doves', 0.8300773175631638),
 ('valencia', 0.8300855207805381),
 ('altars', 0.8303126639026677),
 ('lockets', 0.8303368280506296),
 ('juno', 0.8305828381355577),
 ('mice', 0.831545749416922),
 ('adoration of the magi', 0.8318812472952608),
 ('mars', 0.8325413688066073),
 ('isis', 0.8329837164075949),
 ('worshiping', 0.833774215385332),
 ('pitchers', 0.8338938699597116),
 ('brunswick', 0.8344075266920603),
 ('hammers', 0.8346496848763374),
 ('coffins', 0.8346955213230061),
 ('putti', 0.8353403375945472),
 ('teapots', 0.8355621401601722),
 ('landforms', 0.8356626907046371),
 ('trays', 0.8367663673310892),
 ('algerian', 0.8368063679863313),
 ('apostles', 0.8370848152046744),
 ('insects', 0.8371782282305373),
 ('garlands', 0.8388606243138572),
 ('amazons', 0.8400545093939187),
 ('netsuke', 0.8402906801749943),
 ('acorns', 0.8403102181289765),
 ('buttons', 0.8403494759783638),
 ('attic', 0.8407910477675699),
 ('tunics', 0.8408019624799346),
 ('sri lankan', 0.8411004886893291),
 ('roosters', 0.8417420244133285),
 ('aphrodite', 0.8420523924883284),
 ('saucers', 0.8429285673464773),
 ('waterfalls', 0.8431760528607054),
 ('sadness', 0.8433850185632881),
 ('falcons', 0.8453294595793555),
 ('longton hall', 0.8454612565581348),
 ('snails', 0.8458789342163937),
 ('lizards', 0.8460647461409465),
 ('owls', 0.8463004649602022),
 ('obelisks', 0.846312127207706),
 ('mali', 0.8471583448890028),
 ('antwerp', 0.8477721643039758),
 ('harps', 0.8479444673979499),
 ('mughal', 0.8485233359236959),
 ('bagpipes', 0.8492840660785397),
 ('cornucopia', 0.8494583443908188),
 ('cuneiform', 0.8498862252613573),
 ('deserts', 0.8503563041461797),
 ('acrobats', 0.8507344557479144),
 ('foxes', 0.8510587792064204),
 ('strasbourg', 0.8516687550991364),
 ('siena', 0.8519747452087155),
 ('bologna', 0.8522734992084001),
 ('anger', 0.853022948098849),
 ('corinthian', 0.8533541864293712),
 ('canals', 0.8545491507072837),
 ('coverlets and quilts', 0.8552928803654276),
 ('cradles', 0.856468131499835),
 ('bishops', 0.8564997533982459),
 ('satire', 0.8569874552668673),
 ('seville', 0.8570026753188158),
 ('savoy', 0.8571741450181686),
 ('san sabastian', 0.8573506895724161),
 ("qur'an", 0.8579137593321816),
 ('courtyards', 0.8579648176050533),
 ('turin', 0.8580973602448043),
 ('sicily', 0.858497478752251),
 ('corpses', 0.8589682394040776),
 ('eros', 0.8597437210744159),
 ('babylonian', 0.8609281748982451),
 ('nero', 0.8617885769095119),
 ('hieroglyphs', 0.8618699722251335),
 ('inns', 0.8623417695873806),
 ('crabs', 0.8632726045244016),
 ('anklet', 0.863824333420848),
 ('documents', 0.863838910004763),
 ('friezes', 0.8640021068900865),
 ('windmills', 0.8641419707731182),
 ('poseidon', 0.8643323726978529),
 ('cameos', 0.8649490993387959),
 ('griffins', 0.8659963676518578),
 ('wagons', 0.8674827168260127),
 ('abraham', 0.867565329453066),
 ('volcanoes', 0.8676129525388216),
 ('tents', 0.8678781429268125),
 ('storms', 0.8681336700037631),
 ('telescopes', 0.8684053391494847),
 ('sundials', 0.868915933166184),
 ('parrots', 0.8691405252654181),
 ('the hague', 0.8697615583277858),
 ('minoan', 0.8702196221795863),
 ('triangles', 0.8702482798347079),
 ('vests', 0.8712029106943207),
 ('mosques', 0.8713478671307844),
 ('triton', 0.8719446588391901),
 ('gardeners', 0.8724506094157602),
 ('muses', 0.8740471440768405),
 ('funerals', 0.8749019532742965),
 ('minerva', 0.8750895247597887),
 ('delft', 0.8758410278125692),
 ('cyclades', 0.8758915944092847),
 ('leopards', 0.8759270672480761),
 ('hawks', 0.8759549893825849),
 ('chinoiserie', 0.8763867179586359),
 ('pomegranates', 0.8765170668857666),
 ('adonis', 0.8767944536158405),
 ('contemplation', 0.8778990069439204),
 ('admirals', 0.8781961682359377),
 ('correspondence', 0.8784033320772754),
 ('aragon', 0.8785996705869524),
 ('daisies', 0.8790231002063307),
 ('squirrels', 0.8801842955613988),
 ('kettles', 0.880464932988192),
 ('necktie', 0.8808664647417661),
 ('peacocks', 0.8812144149931874),
 ('judith', 0.8820111850761868),
 ('shiva', 0.8824899901462517),
 ('scorpions', 0.8826635025084695),
 ('seascapes', 0.8827044217467569),
 ('nantes', 0.883957423678123),
 ('mycenaean', 0.8840344154170752),
 ('serpents', 0.8845815672062012),
 ('artemis', 0.8848449833328519),
 ('jockeys', 0.8853530767762767),
 ('taoism', 0.8858037249917006),
 ('crocodiles', 0.8860072157200575),
 ('undergarment', 0.8869998376251078),
 ('chalices', 0.8871093610817339),
 ('chantilly', 0.8883960098144317),
 ('cartouches', 0.8888235586500979),
 ('lobsters', 0.8894182053911497),
 ('sumerian', 0.8901044327939659),
 ('rectangles', 0.8904677479281862),
 ('esther', 0.8905586201504041),
 ('nuremberg', 0.8907533500378866),
 ('prisons', 0.8916304135686661),
 ('catalonia', 0.8937177396940617),
 ('salzburg', 0.894331299348338),
 ('hospitals', 0.8945199728947708),
 ('peonies', 0.8946850309595528),
 ('adoration of the sheperds', 0.8950897597037213),
 ('assyrian', 0.8955971429942142),
 ('vulcan', 0.8971643119599386),
 ('docks', 0.8975330369196357),
 ('javanese', 0.899320333737879),
 ('gaul', 0.9000676135588966),
 ('prisms', 0.9000976777628711),
 ('satyrs', 0.9003296298298442),
 ('magicians', 0.9010853018669205),
 ('lille', 0.9034317259461716),
 ('scarabs', 0.9035434882028716),
 ('palermo', 0.9055946103997702),
 ('genoa', 0.9071741996857299),
 ('pheasants', 0.9083968280374506),
 ('violas', 0.9086255498301148),
 ('coptic', 0.9088551696914694),
 ('italic', 0.9089174838564631),
 ('sevres', 0.9094768321604204),
 ('beakers', 0.9107850419687795),
 ('self-portraits', 0.9114066504530839),
 ('meissen', 0.9120927114062751),
 ('maitreya', 0.9124582553157019),
 ('saxony', 0.9130745899689622),
 ('dionysus', 0.9131148276473968),
 ('cauldrons', 0.9163440968462415),
 ('cycladic', 0.920202328474165),
 ('ewers', 0.920297111794096),
 ('pilasters', 0.9207170129186216),
 ('hilts', 0.9209031080799757),
 ('krishna', 0.9214714985437499),
 ('rouen', 0.9216989524531998),
 ('drunkenness', 0.9219399261617613),
 ('alsace', 0.9220144619986105),
 ('reliquaries', 0.9227870064753869),
 ('buffalos', 0.9238661513116856),
 ('lyons', 0.923998490480219),
 ('chaumont-sur-loire', 0.9265182993782841),
 ('smyrna', 0.9290021801034215),
 ('mandolins', 0.9291859418141367),
 ('bes', 0.9293132570609683),
 ('villeroy', 0.9312495611257625),
 ('centaurs', 0.9315930962801008),
 ('leda', 0.9317379377528405),
 ('ceres', 0.932551667819446),
 ('moro', 0.9344104412177684),
 ('lutes', 0.9344705738360247),
 ('pinecones', 0.9345811817744011),
 ('scythian', 0.9355121713106019),
 ('alexandria-hadra', 0.9361399346506482),
 ('bacchus', 0.9387490773183523),
 ('avignon', 0.9391857186185534),
 ('piedmont', 0.9428130334037477),
 ('huron', 0.9451696011607037),
 ('umbria', 0.9462770653283538),
 ('bodhisattva', 0.9473914367556863),
 ('commodes', 0.9477207209282245),
 ('fauns', 0.9486892829517708),
 ('brescia', 0.948759397765798),
 ('parthian', 0.9497196118095624),
 ('perseus', 0.950393153080634),
 ('cretan', 0.9518265731482344),
 ('caryatids', 0.9523322648661624),
 ('crescents', 0.9536025315556773),
 ('ladles', 0.954772785294571),
 ('ferrara', 0.9548793271212281),
 ('massacres', 0.9558837817082619),
 ('pediments', 0.9577086673584718),
 ('pentecost', 0.9580206433062362),
 ('padua', 0.958266549297634),
 ('macao', 0.9591673422917096),
 ('censers', 0.9594481413100455),
 ('ica', 0.9607497337289413),
 ('tureens', 0.9608505026802686),
 ('strapwork', 0.9659182709236436),
 ('deccan', 0.9662767782484062),
 ('annunciation', 0.9686015949167055),
 ('devonshire', 0.9686826042459229),
 ('castel durante', 0.9687627116271154),
 ('bayreuth', 0.9714978311184019),
 ('fulda', 0.9715020717086523),
 ('leuven', 0.9728169721377823),
 ('canaanite', 0.9728374721290813),
 ('arica', 0.9737761934512522),
 ('diadems', 0.975696705195222),
 ('hittite', 0.9763166180988004),
 ('fluting', 0.9769025306146273),
 ('beauvais', 0.9772251243487095),
 ('augsburg', 0.9778146394459046),
 ('grotesques', 0.979088443170024),
 ('medea', 0.9800278849012948),
 ('frankish', 0.9810281058497423),
 ('nimes', 0.9820261054136443),
 ('veracruz', 0.9824238124359403),
 ('potsdam', 0.9834548433662975),
 ('bobbins', 0.9850037156257101),
 ('naxos', 0.9852077189189652),
 ('furstenberg', 0.9869524091245792),
 ('palmettes', 0.9907452904623987),
 ('vincennes', 0.9914870193634636),
 ('nasca', 0.9920705092447519),
 ('lowestoft', 0.9933043751998766),
 ('spitalfields', 0.9965579550686054),
 ('teotihuacan', 0.99749989179874),
 ('lamentation', 0.9975113191862406),
 ('amun', 0.9976962743752653),
 ('michoacan', 0.999203693714524),
 ('zoroastrian', 1.0002523005158734),
 ('phrygian', 1.0012131642642719),
 ('hexagons', 1.0014217833576464),
 ('etruria', 1.0014537054115489),
 ('gurkha', 1.0017969003179064),
 ('ptolemaic', 1.0025503290408553),
 ('stoke-on-trent', 1.002651621097177),
 ('hathor', 1.0034750835516568),
 ('silesia', 1.0036591431318094),
 ('thuringia', 1.004149969972746),
 ('tabernacles', 1.004354110709238),
 ('southall', 1.004669946749879),
 ('haida', 1.00467107286664),
 ('lydian', 1.0052863081569554),
 ('lotuses', 1.0068787566996031),
 ('lambeth', 1.009812601696393),
 ('durga', 1.0132204188119005),
 ('malayan', 1.0200533603677675),
 ('entombment', 1.0202068094929215),
 ('moche', 1.0210168197114504),
 ('akkadian', 1.0217146131711123),
 ('abbies', 1.0218989581455578),
 ('vishnu', 1.0219464744594053),
 ('bathsheba', 1.022351091016332),
 ('olmec', 1.0249328215641025),
 ('sibyl', 1.0263881889678912),
 ('sulawesi', 1.0278432958272536),
 ('nayarit', 1.0322350155799591),
 ('wurzburg', 1.032420839053399),
 ('burslem', 1.033027538316581),
 ('ariadne', 1.0345967127573774),
 ('nevers', 1.0380752461994753),
 ('thanjavur', 1.0384279106931062),
 ('tricorns', 1.0402728096653397),
 ('jainism', 1.0409235654236473),
 ('tlingit', 1.0415197924290835),
 ('sumatran', 1.0445921974924088),
 ('colima', 1.045361155066096),
 ('hanau', 1.0469912082010664),
 ('thessaly', 1.052529226289946),
 ('moche-wari', 1.056584855105138),
 ('urbino', 1.059661873043267),
 ('octagons', 1.060188855295391),
 ('quechua', 1.0603633175956648),
 ('orpheus', 1.0620672270866784),
 ('silenus', 1.0620985752070715),
 ('wari', 1.0667553142512902),
 ('galatea', 1.0673954413970514),
 ...]
In [14]:
find_similar_word("zoroastrian")
Out[14]:
[('zoroastrian', 0.0),
 ('babylonian', 0.44268571689590497),
 ('canaanite', 0.4787484630189942),
 ('assyrian', 0.5183220144284794),
 ('sumerian', 0.5383639140547541),
 ('javanese', 0.5441512850678976),
 ('hinduism', 0.5487997972599774),
 ('parthian', 0.5592946676961916),
 ('neo-sumerian', 0.5666045566282616),
 ('akkadian', 0.5767607285687351),
 ('cypriot or phoenician', 0.5788225704406497),
 ('sibyl', 0.584317198004562),
 ('taoism', 0.5875933953190249),
 ('ptolemaic', 0.5878650446013078),
 ('balinese', 0.5896001036381728),
 ('frankish', 0.5948383622919559),
 ('hittite', 0.5955164602291769),
 ('dionysus', 0.5994277960386302),
 ('mycenaean', 0.6041906717743145),
 ('jainism', 0.6114615505698704),
 ('quechua', 0.6122848044818319),
 ('alexandria-hadra', 0.6128669572052666),
 ('coptic', 0.6238725581320055),
 ('bacchus', 0.6274264102551815),
 ('macedonian', 0.6278681850713617),
 ('bathsheba', 0.6358245010429233),
 ('buddhism', 0.636806887047441),
 ('christianity', 0.6401398780028069),
 ('cypriot', 0.6521657115636688),
 ('vishnu', 0.6540200778048928),
 ('mughal', 0.6547326837537025),
 ('islamic', 0.6557951330258606),
 ('etruscan', 0.6570237351407666),
 ('hathor', 0.6572494180654407),
 ('egyptian', 0.6605111246964305),
 ('galatea', 0.6607073519191627),
 ('byzantine', 0.6621541742396742),
 ('cretan', 0.6663106056912581),
 ('horus', 0.6670405656378235),
 ('roman deities', 0.6673133675500664),
 ('pentecost', 0.6674087420690857),
 ('algerian', 0.6678039255174648),
 ('gaul', 0.668652989923181),
 ('haida', 0.6691718527614905),
 ('minoan', 0.6722537725567466),
 ('roman egyptian', 0.6726278084117434),
 ('greek deities', 0.6730568655973033),
 ('adoration of the magi', 0.6747887909800723),
 ('ariadne', 0.677744528930904),
 ('sumatran', 0.6798171301858308),
 ('thessaly', 0.6800192847837885),
 ('hieroglyphs', 0.6803152676216334),
 ('syrian', 0.6813597613516968),
 ('nimes', 0.6816102955333432),
 ('cycladic', 0.6842501432646061),
 ('thanjavur', 0.6900282612785935),
 ('tlingit', 0.6911365989718803),
 ('shiva', 0.6928467914541991),
 ('amun', 0.693829967644719),
 ('scarabs', 0.6943850370235944),
 ('zenu', 0.6945833467387906),
 ('rhenish', 0.6957792878751938),
 ('augsburg', 0.6986745079913999),
 ('scandinavian', 0.699344192161873),
 ('maitreya', 0.7010086303742995),
 ('corinthian', 0.7018266353598769),
 ('deities', 0.7021082453942815),
 ('moses', 0.7029685068272326),
 ('silenus', 0.7037176738265494),
 ('malayan', 0.7041894851064618),
 ('colima', 0.7047161235990429),
 ('neolithic', 0.7047497635423666),
 ('netherlandish', 0.7058277883143114),
 ('potsdam', 0.7075739419060649),
 ("qur'an", 0.7088092308679378),
 ('aztec', 0.709124097259717),
 ('moche-wari', 0.7097527194811165),
 ('teotihuacan', 0.7109870316874238),
 ('maenads', 0.7121619421273168),
 ('catalan', 0.7141994651754342),
 ('aphrodite', 0.7145811429182357),
 ('artemis', 0.7150462507386197),
 ('chiriqui', 0.7151895540588278),
 ('scythian', 0.7155516334869778),
 ('tibetan', 0.7162096709292414),
 ('sicily', 0.7183733439302683),
 ('cuneiform', 0.7191496644669637),
 ('caryatids', 0.7224165367331317),
 ('silesia', 0.7228466483170628),
 ('isis', 0.7242771437935794),
 ('cyclades', 0.7245261370884489),
 ('amulets', 0.7252964867613407),
 ('stoke-on-trent', 0.7261839707151678),
 ('serpents', 0.7265551790233454),
 ('iran', 0.7271069603202698),
 ('meissen', 0.7278586767406735),
 ('burslem', 0.7298743722701626),
 ('inuit', 0.7314505579227522),
 ('egypt', 0.7327323673216009),
 ('nasca', 0.7332063690037751),
 ('padua', 0.7343284167400318),
 ('fauns', 0.7362608470089982),
 ('minerva', 0.737242796086363),
 ('greek or roman', 0.7374108848752261),
 ('turin', 0.7377466034738602),
 ('rouen', 0.737797500101454),
 ('grotesques', 0.7391941518695173),
 ('sphinx', 0.7393255698635957),
 ('east greek/sardis', 0.7396190114188816),
 ('durga', 0.7397148951319314),
 ('sulawesi', 0.7406955829769055),
 ('lamentation', 0.7414334344299363),
 ('abruzzi', 0.7421041725505664),
 ('southall', 0.7422188393893159),
 ('censers', 0.7422492367512393),
 ('adonis', 0.7426588579076874),
 ('popes', 0.7434052309909629),
 ('wari', 0.7445584773274062),
 ('burma', 0.7448274687304144),
 ('hanau', 0.744937246606065),
 ('hungary', 0.7451405246621692),
 ('veracruz', 0.7469129073065972),
 ('strapwork', 0.746979935491574),
 ('wurzburg', 0.7469905724732026),
 ('buddha', 0.7470002390007147),
 ('nantes', 0.7472188634405992),
 ('archangel gabriel', 0.7475178377874041),
 ('adoration of the sheperds', 0.7479276905209045),
 ('crete', 0.7479898669027223),
 ('italian or sicilian', 0.7483404148506674),
 ('annunciation', 0.7492483551088327),
 ('ewers', 0.7511562973577459),
 ('norwegian', 0.7516210527544918),
 ('meissen with german', 0.7525114016168498),
 ('west slavic', 0.7533642110771077),
 ('antwerp', 0.7553202162843253),
 ('nayarit', 0.7561526505011416),
 ('obelisks', 0.7565099858951341),
 ('aegean', 0.756849274474151),
 ('perseus', 0.75783968293542),
 ('reliquaries', 0.7584265985163083),
 ('lotuses', 0.7585374553093566),
 ('poseidon', 0.7591237643581998),
 ('lowestoft', 0.7592492256147927),
 ('lille', 0.7599042872118202),
 ('hindu religious figures', 0.7608602024724105),
 ('mythology', 0.7609855091581884),
 ('greek', 0.761237406028612),
 ('bodhisattva', 0.7615304336404374),
 ('spitalfields', 0.7620668879633354),
 ('augsburg decoration', 0.7620874990743327),
 ('avignon', 0.7621701375093551),
 ('gurkha', 0.762425709003894),
 ('satyrs', 0.7634703211503789),
 ('nevers', 0.7635899659931917),
 ('roman', 0.76394055624208),
 ('octagons', 0.7641679544762947),
 ('moroccan', 0.7652573219410732),
 ('nuremberg', 0.7659682790410011),
 ('orpheus', 0.7663946713853624),
 ('ancient greek', 0.7669162423010732),
 ('palmettes', 0.767043613365261),
 ('siena', 0.7672664127122494),
 ('arabian', 0.7677170989296095),
 ('baptism of christ', 0.7682795250372488),
 ('zeus', 0.7684046555488331),
 ('bayreuth', 0.7688375615992566),
 ('mali', 0.7692639644255931),
 ('christ', 0.7702921355561287),
 ('shakespeare', 0.7703450844855455),
 ('bavaria', 0.7708197999071973),
 ('smyrna', 0.7718739840866381),
 ('apostles', 0.7719996306911807),
 ('michoacan', 0.7723125060458264),
 ('kazakhstan', 0.7742668837470268),
 ('olmec', 0.7752842802380981),
 ('austrian', 0.775358185023477),
 ('lyons', 0.7754315466464385),
 ('aragon', 0.7756334903206284),
 ('chalices', 0.7758069623053063),
 ('vincennes', 0.7758319250784386),
 ('hungarian', 0.7761712451612691),
 ('salzburg', 0.7764484737543549),
 ('lambeth', 0.7770316923370562),
 ('devonshire', 0.777186684302577),
 ('fulda', 0.7773064903740797),
 ('palermo', 0.7779486899185464),
 ('indian or nepalese', 0.7799927321406619),
 ('julius caesar', 0.7803117845321794),
 ('griffins', 0.7805113539961104),
 ('buddhist religious figures', 0.7805454512794848),
 ('jupiter', 0.7808830927519826),
 ('peruvian', 0.7818718305247948),
 ('saxony', 0.7833150554280982),
 ('sundials', 0.7851675717117457),
 ('vestments', 0.7860480642012713),
 ('leuven', 0.7867946877634961),
 ('alsace', 0.7868078366337614),
 ('moro', 0.7872862432650203),
 ('cartouches', 0.7883203182735862),
 ('hercules', 0.7889952670399277),
 ('magicians', 0.7891652414055134),
 ('crucifixion', 0.7898322942167968),
 ('arica', 0.7903233486268468),
 ('nepal', 0.7903404867475577),
 ('flemish', 0.7905227351018275),
 ('naples', 0.7912499342102686),
 ('beauvais', 0.79129128489892),
 ('gods', 0.7916725326021765),
 ('apollo', 0.7917131305178609),
 ('danish', 0.7920686372758787),
 ('pesaro', 0.7921238503625669),
 ('altars', 0.7922765692012015),
 ('diadems', 0.7924444131196446),
 ('strasbourg', 0.7937868478679517),
 ('augsburg original', 0.794087449335747),
 ('arabic', 0.7941233836288758),
 ('athena', 0.7942289918194243),
 ('verona', 0.7944315540839019),
 ('chinoiserie', 0.7948056984637648),
 ('stockholm', 0.7953436068663138),
 ('naxos', 0.7957695067600381),
 ('zurich', 0.7958122909340682),
 ('eros', 0.7958263154212335),
 ('neptune', 0.7959791164411146),
 ('jesus', 0.7961028250816748),
 ('the hague', 0.7969082557080374),
 ('sevres', 0.7969255618626654),
 ('thuringia', 0.7981677030566532),
 ('colombian', 0.7982841129790053),
 ('turkish or venice', 0.7984449513195512),
 ('scottish', 0.7984974270206187),
 ('phrygian', 0.7986802895687276),
 ('brescia', 0.7992467113817746),
 ('cleopatra', 0.7993280444626168),
 ('mary magdalene', 0.7994401621750854),
 ('macao', 0.8003972289195231),
 ('mosques', 0.802059696502562),
 ('hermes', 0.8021684931674349),
 ('tabernacles', 0.8022814666456851),
 ('geneva', 0.802501311857982),
 ('turkish', 0.8026114594037179),
 ('centaurs', 0.8032061768100855),
 ('emblems', 0.8048105160775728),
 ('caucasian', 0.8050971119373609),
 ('medea', 0.8063474116214112),
 ('leda', 0.8066382531046452),
 ('chariots', 0.8067858456010323),
 ('ferrara', 0.8073510423192105),
 ('scorpions', 0.8076407483545821),
 ('dresden', 0.8079794602745345),
 ('delft', 0.8080345358831489),
 ('huron', 0.8080401901305294),
 ('copenhagen', 0.8081131964823751),
 ('moche', 0.8082226388966303),
 ('crescents', 0.8086863815402342),
 ('afghan', 0.808996953990196),
 ('daggers', 0.8095012643746016),
 ('cambodia', 0.8098363000255302),
 ('inscriptions', 0.8105605548913086),
 ('vulcan', 0.8115415556898757),
 ('ceres', 0.8117403279219317),
 ('scrolls', 0.8121256367680152),
 ('czech', 0.8138079612094874),
 ('zodiac', 0.8142598193107853),
 ('inca', 0.8146770352548911),
 ('furstenberg', 0.8157267546226014),
 ('esther', 0.8160322512909735),
 ('rosaries', 0.8161132868448687),
 ('pediments', 0.8163120084971026),
 ('hilts', 0.8166805605832869),
 ('urbino', 0.8170076978350389),
 ('tombs', 0.8175559526919526),
 ('piedmont', 0.818129188386959),
 ('deccan', 0.8185274855396414),
 ('contemplation', 0.8187449094735377),
 ('etruria', 0.8201474879412689),
 ('san sabastian', 0.8203763186076843),
 ('acanthus', 0.8204032922243241),
 ('savoy', 0.8207688883958189),
 ('leopards', 0.8208364467269469),
 ('friezes', 0.8211295934963742),
 ('versailles', 0.8211987503481594),
 ('temples', 0.8214557492227669),
 ('flemish or italian', 0.8216552238098075),
 ('sri lankan', 0.8216996261782611),
 ('krishna', 0.8218503011701738),
 ('netsuke', 0.822605807355557),
 ('sarcophagus', 0.82271533350462),
 ('munich', 0.822908140930828),
 ('cauldrons', 0.822936302252579),
 ('umbria', 0.8235640049205483),
 ('tibet', 0.824118395322325),
 ('bible', 0.8261153013478844),
 ('north netherlandish', 0.8263595024424826),
 ('milan', 0.8270335546860109),
 ('christian imagery', 0.8274392361992018),
 ('armors', 0.8287563022397216),
 ('abbies', 0.82922955482136),
 ('south netherlandish', 0.8300721332455765),
 ('rome', 0.8302629742456646),
 ('staffordshire', 0.8304867325486913),
 ('princes', 0.8310723349985154),
 ('valencia', 0.8313486375440409),
 ('catalonia', 0.8333418993212622),
 ('pomegranates', 0.8336758566391732),
 ('worshiping', 0.8338186912365861),
 ('goddess', 0.8352606300368342),
 ('bordeaux', 0.8359876607362065),
 ('saint jerome', 0.8362797736365979),
 ('tunics', 0.8364458234778986),
 ('judith', 0.837007512061159),
 ('tapestries', 0.8372093245629693),
 ('admirals', 0.8373762947377036),
 ('landforms', 0.8384861603917599),
 ('swedish', 0.8389972134414563),
 ('muses', 0.8396165429096404),
 ('east greek', 0.8396439989658571),
 ('lydian', 0.8399034563150146),
 ('pyramids', 0.8400297859773738),
 ('british or scottish', 0.8402898049988674),
 ('lockets', 0.8404276521082368),
 ('cathedrals', 0.841751992436574),
 ('monks', 0.8425658246029407),
 ('religious texts', 0.8438900308223398),
 ('avon', 0.8443883081439882),
 ('chaumont-sur-loire', 0.844809841521296),
 ('apocalypse', 0.845593807545371),
 ('villeroy', 0.8459820720458517),
 ('eastern mediterranean', 0.8466053904438158),
 ('maya', 0.8478525457380581),
 ('emperor augustus', 0.8481918646717542),
 ('irish', 0.8492636412727065),
 ('saints', 0.8499972444616761),
 ('indonesia', 0.8503352836370651),
 ('eastern mediterranean or italian', 0.8532472648914555),
 ('lutes', 0.853913961233348),
 ('caves', 0.853922156318311),
 ('coffeepots', 0.8539345279541817),
 ('amazons', 0.8543894648109811),
 ('palaces', 0.8546844081722265),
 ('seville', 0.8549100165237926),
 ('demons', 0.8549333554820853),
 ('ica', 0.8556226051304621),
 ('pilasters', 0.8562605163402239),
 ('europa', 0.8564616707993674),
 ('acrobats', 0.8565712760363174),
 ('greek figures', 0.8573234566373543),
 ('manuscripts', 0.8575611848587732),
 ('watermills', 0.8581268723052828),
 ('juno', 0.8586158053373847),
 ('amsterdam', 0.8594411065319584),
 ('cupid', 0.859764990241314),
 ('candelabra', 0.8609404182168732),
 ('funerals', 0.8623100550854765),
 ('greek islands', 0.8624377682988915),
 ('abraham', 0.8625665615072979),
 ('seascapes', 0.8628410463118157),
 ('necktie', 0.8636835495859028),
 ('castel durante', 0.864122116831218),
 ('belgian', 0.8644953461653272),
 ('tureens', 0.8649411397692431),
 ('courtyards', 0.8660112943386454),
 ('arrowheads', 0.8660991171639257),
 ('entombment', 0.8664652756204685),
 ('diana', 0.8668891905887253),
 ('limoges', 0.8670707937192056),
 ('undergarment', 0.8682251583641309),
 ('buffalos', 0.8683422473363366),
 ('doorways', 0.8695274225843213),
 ('foxes', 0.8706018051620608),
 ('drunkenness', 0.8710004933513564),
 ('florence', 0.8713587321777465),
 ('portuguese', 0.8715056373719962),
 ('pakistan', 0.8717783498721794),
 ('madrid', 0.872157864083756),
 ('coverlets and quilts', 0.8727496204544176),
 ('prisms', 0.8730848668464093),
 ('venice', 0.873297653560926),
 ('louis xiv', 0.8734984863736766),
 ('adam', 0.8738198904833596),
 ('kings', 0.874605498255205),
 ('venus', 0.8747125570158157),
 ('snakes', 0.875178466671527),
 ('putti', 0.8757550330270452),
 ('old assyrian trading colony', 0.8763130744655202),
 ('kathmandu valley', 0.8763296207284265),
 ('globes', 0.8763593325450983),
 ('devil', 0.8771258405782583),
 ('shepherds', 0.8779259527740803),
 ('dublin', 0.8780021324902001),
 ('saint matthew', 0.8783050047047956),
 ('fathers', 0.8787667455058651),
 ('fluting', 0.879311455781323),
 ('boars', 0.8795000087599439),
 ('hexagons', 0.8798636170526895),
 ('candlesticks', 0.8800237848973761),
 ('poets', 0.880649274241473),
 ('nymphs', 0.8806711215315552),
 ('bes', 0.8815428697255453),
 ('peacocks', 0.8815458165094715),
 ('bohemian', 0.8821749530664243),
 ('for iberian market', 0.882440657288194),
 ('tricorns', 0.8828238660681426),
 ('brunswick', 0.8833891476560396),
 ('monuments', 0.8847700494534172),
 ('berlin', 0.8850865264842193),
 ('philippine', 0.8854724782484904),
 ('paisley', 0.8856145827310873),
 ('self-portraits', 0.8859171598298935),
 ('mark antony', 0.8859314095840526),
 ('funerary objects', 0.8862286593258051),
 ('psyche', 0.8863515842253616),
 ('vienna', 0.8864354828821277),
 ('st. petersburg', 0.8864721353542864),
 ('genoa', 0.8865005178032936),
 ('saint john the baptist', 0.886939662767648),
 ('coffins', 0.8871332703829625),
 ('satire', 0.8873683781927586),
 ('saint catherine', 0.887542585823649),
 ('insignia', 0.8879089058172074),
 ('nativity', 0.8883272422637423),
 ('tigers', 0.8883926692371719),
 ('bologna', 0.8883946177147568),
 ('saint joseph', 0.8889195062331945),
 ('astronomy', 0.8889710994200541),
 ('italic', 0.889441950062626),
 ('easter', 0.8899049866042229),
 ('deserts', 0.8910387226260826),
 ('dutch', 0.8927821089721718),
 ('triton', 0.8929446450063414),
 ('goblets', 0.8935899070988711),
 ('mythical creatures', 0.8939915343748653),
 ('dragons', 0.8940640144395988),
 ('bishops', 0.8944624060389592),
 ('robes', 0.8944869447451718),
 ('thailand', 0.8951297999035447),
 ('vietnam', 0.8954079759605597),
 ('european', 0.895650381950713),
 ('acorns', 0.8961267259275609),
 ('russian', 0.8969813325930728),
 ('urns', 0.8971471632837624),
 ('medallions', 0.898079598444227),
 ('longton hall', 0.898230016915003),
 ('korea', 0.8996658197317151),
 ('liturgical objects', 0.8999726543518423),
 ('chinese', 0.9001301084215936),
 ('saint francis', 0.9007018074247596),
 ('benjamin franklin', 0.90114908500748),
 ('german', 0.9012187338832611),
 ('princesses', 0.9017840402251487),
 ('donkeys', 0.9020567411516587),
 ('saint john the evangelist', 0.9021016869923446),
 ('costa rica or panama', 0.9024682970116554),
 ('garlands', 0.9026890078688765),
 ('churches', 0.902865643497621),
 ('worcester', 0.9030221989981148),
 ('coromandel coast', 0.9032821594152903),
 ('burials', 0.9034802922450151),
 ('peonies', 0.9035290950746766),
 ('birmingham', 0.9041453092854855),
 ('brussels', 0.9041650363808517),
 ('falcons', 0.904309572014076),
 ('hilt', 0.9049819167485453),
 ('chinese with dutch decoration', 0.9050144770783775),
 ('castles', 0.9052984698524318),
 ('pendants', 0.9055279295811565),
 ('swords', 0.9073726800128596),
 ('napoleon i', 0.9076148311989625),
 ('generals', 0.9077026259664257),
 ('nonrepresentational art', 0.9077158676714411),
 ('nike', 0.9081427001378289),
 ('chinese with european decoration', 0.9087518164822729),
 ('saint lawrence', 0.9093006172172359),
 ('chantilly', 0.9095169716527808),
 ('ladles', 0.9103888336748563),
 ('anklet', 0.9108090961403023),
 ('scarves', 0.9112829622644516),
 ('british', 0.9115840992626324),
 ('eastern european', 0.9127909524602843),
 ('villages', 0.9128748040142869),
 ('tulips', 0.913532828241085),
 ('chelsea', 0.914618223986467),
 ('india', 0.9150649350393798),
 ('harps', 0.9159259389899387),
 ('fairies', 0.9166005074467314),
 ('liverpool', 0.9166561928565793),
 ('camels', 0.9167705031016418),
 ('literature', 0.9169911688666416),
 ('sunflowers', 0.9179387733452782),
 ('incense burners', 0.9189735708207503),
 ('saint michael', 0.9197863085679588),
 ('arches', 0.9198520379770192),
 ('italian', 0.9200607992061274),
 ('capitals', 0.920165984550291),
 ('corpses', 0.9210196049873733),
 ('surrey', 0.92206837111944),
 ('crocodiles', 0.922090410108496),
 ('couches', 0.9229342832334522),
 ('american or european', 0.9239226972016794),
 ('angels', 0.9243267998994693),
 ('fortification', 0.9245443336423829),
 ('assumption of the virgin', 0.9249697705436619),
 ('saint peter', 0.924986037842667),
 ('orleans', 0.925049134129623),
 ('german or swiss', 0.9256403363896128),
 ('ruins', 0.9260176959182751),
 ('saint anthony', 0.9265260987666019),
 ('occupations', 0.9266666681602538),
 ('cradles', 0.9277846330328545),
 ('sheffield', 0.9290299972031711),
 ('volcanoes', 0.9290884174622724),
 ('commodes', 0.92957865333047),
 ('skulls', 0.9305690742844929),
 ('domes', 0.9306581013473678),
 ('bagpipes', 0.930787028285848),
 ('lizards', 0.9314935326727515),
 ('vases', 0.931622127766755),
 ('costa rica', 0.9317635606085299),
 ('barns', 0.9319647090672248),
 ('lions', 0.9325443086896084),
 ('pinecones', 0.9342417063310953),
 ('triangles', 0.9345210720839143),
 ('england', 0.935023971661036),
 ('windmills', 0.9351500723003651),
 ('celestial bodies', 0.9352358024109242),
 ('circles', 0.935980741515506),
 ('calendars', 0.9364369973402861),
 ('saint paul', 0.9369656267331662),
 ('roosters', 0.937091478367968),
 ('carpets and rugs', 0.9378458002017382),
 ('italian or spanish', 0.9382572540229317),
 ('snails', 0.9400961110517512),
 ('poetry', 0.9401539252725175),
 ('brooches', 0.9406350726860371),
 ('saint anne', 0.941499756337813),
 ('mandolins', 0.9417732477415864),
 ('bristol', 0.9425229444561125),
 ('japan', 0.9431429822147931),
 ('portraits', 0.9438600510664965),
 ('skeletons', 0.9440353557043863),
 ('towers', 0.9443919358086579),
 ('prostitutes', 0.9450547527343605),
 ('corsets', 0.9460078836181744),
 ('rectangles', 0.9460187726976098),
 ('saint george', 0.946143199494926),
 ('doves', 0.9479059765445113),
 ('wells', 0.9501768041299219),
 ('abstraction', 0.9507360517261481),
 ('mexican', 0.951014755709615),
 ('candles', 0.9510300959280846),
 ('virgin mary', 0.9526825797700411),
 ('facades', 0.9527337836069217),
 ('atlantic watershed', 0.9527828157731162),
 ('northern european', 0.9529448217375662),
 ('edinburgh', 0.9529828620339755),
 ('lambs', 0.9530887470492585),
 ('vase fragments', 0.9530970051862961),
 ('fireplaces', 0.9533857371321843),
 ('vests', 0.9541899904818081),
 ('navy', 0.9547914369970716),
 ('fountains', 0.9547925343268296),
 ('lamps', 0.9552992503857043),
 ('italic-native', 0.9558317533513137),
 ('cats', 0.9560713886777245),
 ('northern india', 0.956109140848712),
 ('western european', 0.9569138461063005),
 ('chests', 0.9573577406961384),
 ('teapots', 0.9573793638339184),
 ('curtains', 0.9575361472771694),
 ('cornucopia', 0.9576076438410726),
 ('architectural fragments', 0.958515392054028),
 ('octopus', 0.9586793501568903),
 ('sadness', 0.9588705878362737),
 ('slavery', 0.959867555581284),
 ('rams', 0.9604937170317495),
 ('holy family', 0.9605867123597104),
 ('beakers', 0.9612952032591641),
 ('clouds', 0.9615664895352236),
 ('american', 0.9618219770953362),
 ('wreaths', 0.9619991935112671),
 ('infants', 0.9620594695671747),
 ('eagles', 0.9624552565536089),
 ('mice', 0.9624985319833094),
 ('northern italian', 0.9627240626384582),
 ('southern german', 0.9639236563510428),
 ('south german', 0.964091135435569),
 ('massacres', 0.9643329006404922),
 ('goats', 0.9644090166943831),
 ('skirts', 0.9651035306479016),
 ('david', 0.9651279421321256),
 ('saint-cloud', 0.9652096096993058),
 ('for danish market', 0.9654660396298775),
 ('saint mark', 0.965686724012054),
 ('waterfalls', 0.9659749874955439),
 ('burial grounds', 0.9660568512493182),
 ('spanish', 0.9661055788700579),
 ('stripes', 0.9662713961618848),
 ('mothers', 0.9664588267466988),
 ('ornament', 0.9665159071354937),
 ('trumpets', 0.9670370891694734),
 ('hawks', 0.9671939050217357),
 ('daisies', 0.9689425086896141),
 ('north indian', 0.969042096418155),
 ('swiss', 0.9692921294031768),
 ('tents', 0.9701993732053612),
 ('central highlands', 0.9705236178683427),
 ('london', 0.9710212206825054),
 ('axes', 0.9710521563279098),
 ('mountains', 0.9716404281697651),
 ('bulls', 0.9719839468803801),
 ('french or german', 0.972087612429199),
 ('pigeons', 0.9723543608554571),
 ('christmas', 0.9731039263169626),
 ('rabbits', 0.9731350832300756),
 ('rings', 0.9732630556323909),
 ('vines', 0.973506618673006),
 ('northwest china/eastern central asia', 0.9736304134875489),
 ('queens', 0.9740863089873448),
 ('sailors', 0.9744295941836381),
 ('shields', 0.9745950179112538),
 ('religious events', 0.9747695499931316),
 ('ladders', 0.9749079371188678),
 ('north italian', 0.9749867158284286),
 ('nero', 0.9751352807629716),
 ('mercury', 0.9752899643822704),
 ('central asia', 0.9753064765619343),
 ('squirrels', 0.9758974291773187),
 ('landscapes', 0.9761429391949646),
 ('hills', 0.9762702794001665),
 ('servants', 0.976424733632674),
 ('geometric patterns', 0.976738344403618),
 ('south italian', 0.9773825444862688),
 ('attic', 0.9776410447226332),
 ('chelsea-derby', 0.9790281754858529),
 ('british or french', 0.9793841624453111),
 ('punishment', 0.9795572186741395),
 ('interiors', 0.9804968798958192),
 ('central european', 0.980748789085002),
 ('arrows', 0.9809563093993061),
 ('monsters', 0.9810377104722964),
 ('seas', 0.9814949841666286),
 ('owls', 0.9825039007381653),
 ('ceremony', 0.9826254866178583),
 ('french or italian', 0.9833080038610111),
 ('kettles', 0.9837109391740411),
 ('eve', 0.9838778846601387),
 ('old testament', 0.9840251495147705),
 ('circus', 0.9841202312137143),
 ('chess', 0.9843618176394737),
 ('billiards', 0.9846630652812779),
 ('sky', 0.9856639625931792),
 ('paris', 0.9857764686649709),
 ('textiles', 0.9871868160417485),
 ('fire', 0.9877488709276694),
 ('saucers', 0.9877517431500844),
 ('frogs', 0.9879620316608736),
 ('textile fragments', 0.9885561411928273),
 ('necklaces', 0.9901929871575991),
 ('lobsters', 0.9902434293531995),
 ('squares', 0.9905313811306382),
 ('spears', 0.9905865671394127),
 ('central italian', 0.9907025594360142),
 ('hell', 0.990915394837384),
 ('polka-dot pattern', 0.9915776524300036),
 ('architects', 0.9916165128725002),
 ('northwest china', 0.9917636645335612),
 ('autumn', 0.9920181171150048),
 ('moon', 0.9922364694657709),
 ('dawn', 0.9933328869785198),
 ('canals', 0.9933776309688285),
 ('george washington', 0.9934296888569358),
 ('bracelets', 0.9936111543465979),
 ('diamonds', 0.9941274995527678),
 ('inns', 0.9942957680033438),
 ('railways', 0.9948001481483886),
 ('jockeys', 0.9948848681457915),
 ('cities', 0.9952764601115516),
 ('monkeys', 0.9954648043410916),
 ('architecture', 0.9956753325617115),
 ('pheasants', 0.9960904329579349),
 ('sword guards', 0.9961826429011661),
 ('towns', 0.9972911105846274),
 ('tablets', 0.9973689656862836),
 ('pigs', 0.9973731058065095),
 ('bathing', 0.9974106592357704),
 ('for swedish market', 0.9985053108838066),
 ('la rochelle', 0.9986789052829207),
 ('historical figures', 0.9988643483795253),
 ('jason', 0.9992660464781731),
 ('gardeners', 0.9995646640329642),
 ('colonial american', 0.9996662612183366),
 ('olive trees', 0.9999646768457398),
 ('storms', 0.9999908685377091),
 ('gates', 1.0000442883535858),
 ('basins', 1.0002123171137338),
 ('european bronze age', 1.0002523005158734),
 ('insects', 1.0007348558953841),
 ('prisons', 1.0008871728415367),
 ('earrings', 1.0010554080751777),
 ('cameos', 1.0021604944964937),
 ('peaches', 1.0021730391723498),
 ('male nudes', 1.0035863665032656),
 ('swans', 1.0042988615420785),
 ('actresses', 1.0046086271901915),
 ('utilitarian objects', 1.005104289868325),
 ('embroidery', 1.0052223781141658),
 ('dice', 1.005230108752987),
 ('bears', 1.0053591588408415),
 ('clocks', 1.0056523376514162),
 ('female nudes', 1.006079441565769),
 ('chinese with french mounts', 1.0061397819813245),
 ('bamboo', 1.0078202061012396),
 ('rivers', 1.0083277706301261),
 ('anger', 1.0088416198755294),
 ('wars', 1.0089213020681695),
 ('coins', 1.0093196717172082),
 ('french or swiss', 1.0093683572135643),
 ('sun', 1.009396304220885),
 ('cannons', 1.009861639877762),
 ('telescopes', 1.0103044644345274),
 ('forests', 1.0106297566151423),
 ('lakes', 1.0108402667372134),
 ('roses', 1.0110559776773864),
 ('zigzag pattern', 1.0114910694719643),
 ('china', 1.0121752765329466),
 ('carriages', 1.0129267599537533),
 ('illness', 1.0131523175906376),
 ('jackets', 1.0140657523499668),
 ('elephants', 1.0146102373800392),
 ('parrots', 1.0146827648899113),
 ('architectural elements', 1.0147556000670264),
 ('dresses', 1.01516004057645),
 ('correspondence', 1.01551678275415),
 ('fear', 1.0157622369647827),
 ('theatre', 1.015822284205109),
 ('jewelry', 1.0160765270952705),
 ('singers', 1.01643980637903),
 ('new testament', 1.0164501123060743),
 ('merchants', 1.0165765804912914),
 ('grapes', 1.016743773895792),
 ('jugs', 1.0168580922814028),
 ('stools', 1.0172907099125317),
 ('madonna and child', 1.0175041559947573),
 ('for portuguese market', 1.0180482847566323),
 ('maps', 1.0188547217554977),
 ('bobbins', 1.0191034592467407),
 ('dancers', 1.0195483739607583),
 ('organs', 1.019808443201872),
 ('bees', 1.0203014995364341),
 ('badges', 1.0203501293012147),
 ('alexander the great', 1.0209293251211835),
 ('after german', 1.02333926307),
 ('ponds', 1.0235382279203264),
 ('sculpture', 1.0239054207351865),
 ('weapons', 1.024414709470906),
 ('plants', 1.0247278379130838),
 ('lovers', 1.0249478373349088),
 ('seals', 1.0257750079901373),
 ('for russian market', 1.0257869009758604),
 ('prisoners', 1.0259697994995005),
 ('purses', 1.0260008563061034),
 ('buckles', 1.0261159797473463),
 ('shirts', 1.0281920498826522),
 ('animals', 1.0282879671615766),
 ('footwear', 1.0284101109452901),
 ('uniforms', 1.028583832442363),
 ('army', 1.0295522908187256),
 ('dolphins', 1.029772819954807),
 ('trees', 1.0299998660087921),
 ('egg and dart', 1.0302269609020986),
 ('pitchers', 1.0303097991501837),
 ('wagons', 1.0304549708570456),
 ('flutes', 1.0306990492824695),
 ('cross', 1.0307043823068098),
 ('masks', 1.0307966843418306),
 ('gardens', 1.0318492063782174),
 ('butterflies', 1.0320833295460663),
 ('turtles', 1.032359120022032),
 ('battles', 1.032618773245644),
 ('asia minor', 1.0327674702097913),
 ('benches', 1.0329307647854944),
 ('lighting', 1.0331836573333946),
 ('cows', 1.0334409387659926),
 ('death', 1.0334981382427555),
 ('crabs', 1.0336802981966469),
 ('north china', 1.033789074768367),
 ('colonial', 1.0337932747842156),
 ('bakers', 1.0338036945051479),
 ('london original', 1.033832548401572),
 ('flags', 1.034778396441433),
 ('couples', 1.0352023167531934),
 ('rain', 1.035398274218906),
 ('after british', 1.0354657944141443),
 ('violas', 1.0355271634093726),
 ('polish', 1.035737946140454),
 ('apples', 1.0360455731232403),
 ('for european market', 1.0365027093976542),
 ('bats', 1.0367479366861119),
 ('men', 1.0369683446439983),
 ('journals', 1.037277196646489),
 ('wind', 1.0372848472169018),
 ('gingham pattern', 1.0375045869838977),
 ('coats', 1.0383434566890946),
 ('kitchens', 1.0386172352754708),
 ('women', 1.038746110533446),
 ('after italian', 1.039240078097833),
 ('soldiers', 1.039731389219537),
 ('columns', 1.0410521083440114),
 ('houses', 1.0422452117660512),
 ('balconies', 1.0422923511410118),
 ('jars', 1.0438018425524955),
 ('for british market', 1.0441417701110944),
 ('spectators', 1.0443865254170905),
 ('suffering', 1.0444597983520176),
 ('flowers', 1.045133541929204),
 ('fireworks', 1.0455714405191798),
 ('costumes', 1.0459056585125968),
 ('military clothing', 1.0460214821730076),
 ('decorative designs', 1.0467324119888974),
 ('sheep', 1.0477721018421173),
 ('factories', 1.0487814907228161),
 ('french', 1.0492629271961305),
 ('chickens', 1.0499642672791805),
 ('beads', 1.0504397555691354),
 ('boots', 1.0508356651884292),
 ('cutlery', 1.0509715206103476),
 ('dogs', 1.0517806470142952),
 ('helmets', 1.052264749686713),
 ('cranes', 1.0525942998788964),
 ('military', 1.0526678325706371),
 ('smoking', 1.0529535940888153),
 ('derby', 1.0540602627008333),
 ('descent from the cross', 1.0546952762223798),
 ('after russian original', 1.0551944424645052),
 ('knives', 1.055224353238073),
 ('pants', 1.055784677250698),
 ('screens', 1.055992705066103),
 ('books', 1.0565309055528762),
 ('winter', 1.0565790898331637),
 ('united states', 1.0566636058839731),
 ('hammers', 1.0566668561681634),
 ('beaches', 1.056764235474413),
 ('buildings', 1.0570358148500694),
 ('desks', 1.0573873707248405),
 ('hats', 1.0575114562293864),
 ('buildings and structures', 1.0582839002602822),
 ('dancing', 1.058788075480073),
 ('last supper', 1.0593848645330481),
 ('decorative elements', 1.0595789351876295),
 ('trophies', 1.0597553220458096),
 ('after german original', 1.0597971779387347),
 ('hospitals', 1.0600804911244182),
 ('unknown', 1.0605130124044426),
 ('dance', 1.0608004274922027),
 ('snow', 1.061845860506999),
 ('doctors', 1.061956336083128),
 ('mars', 1.0620807061107969),
 ('bedrooms', 1.0624179125743065),
 ('teachers', 1.0627142707151076),
 ('furniture', 1.063250546752538),
 ('last judgement', 1.0632901901527025),
 ('feathers', 1.0640857295205213),
 ('stairs', 1.0641793577717371),
 ('doors', 1.0642707167871208),
 ('waves', 1.0646964485450827),
 ('shells', 1.0661511995098085),
 ('feet', 1.067117854909622),
 ('eyes', 1.067801830167047),
 ('bowls', 1.068479361664221),
 ('suits', 1.0685992380314422),
 ('pocket watches', 1.068694329508793),
 ('dolls', 1.0718219049134856),
 ('farms', 1.0722897752554383),
 ('human figures', 1.0723229602605875),
 ('shoes', 1.0724246228161884),
 ('concerts', 1.0736261630982407),
 ('pianos', 1.0742993939765508),
 ('bodies of water', 1.0743233596856334),
 ('watches', 1.0745513745844308),
 ('provincial', 1.0753047048869504),
 ('fruit', 1.0753245820660327),
 ('firearms', 1.0753791168981723),
 ('for american market', 1.075778690964652),
 ('rowing', 1.0759820947610321),
 ('keys', 1.076076906194941),
 ('girls', 1.0762020188916934),
 ('ducks', 1.0766325519311815),
 ('reading', 1.0774220649214032),
 ('design elements', 1.077866492613092),
 ('civil war', 1.0781124116433611),
 ('world war i', 1.0782040669180752),
 ('bicycles', 1.079244673188273),
 ('beds', 1.0796468787449274),
 ('musicians', 1.0797708386886706),
 ('vegetables', 1.0797717167368663),
 ('deer', 1.0801235165214615),
 ('lace', 1.0801314766366228),
 ('wine', 1.0802959882751515),
 ('mirrors', 1.0804533111177217),
 ('roads', 1.080929492069357),
 ('bow and arrow', 1.0810993624929877),
 ('belts', 1.0813000933255132),
 ('opera', 1.0815064098128966),
 ('clothing and accessories', 1.0821176972846551),
 ('boxing', 1.0821217649658075),
 ('air transports', 1.0829294098733315),
 ('genre scene', 1.0834613093379175),
 ('coat of arms', 1.0836557547473606),
 ('windows', 1.0842345879654862),
 ('schools', 1.0846464343109017),
 ('violins', 1.084938087796499),
 ('boats', 1.0851159448760963),
 ('tea caddy', 1.0853115071087167),
 ('tables', 1.0861717615853792),
 ('documents', 1.0863235108148679),
 ('bridges', 1.0881516693571043),
 ('boys', 1.0882076678429236),
 ('seating furniture', 1.0897768119456084),
 ('trays', 1.0903884681474),
 ('drawing', 1.0905598406369785),
 ('fish', 1.0909029964842352),
 ('law', 1.0922477109378712),
 ('horns', 1.0927320304304984),
 ('dishes', 1.0936627288315612),
 ('spring', 1.0940789743052697),
 ('streets', 1.094293844171938),
 ('docks', 1.0945628480123697),
 ('cabinets', 1.09614070212697),
 ('birds', 1.097479907920503),
 ('chairs', 1.0979438033765916),
 ('painting', 1.0980184656941272),
 ('washing', 1.1004152396906446),
 ('baseball', 1.1015824297606724),
 ('actors', 1.1025752641314233),
 ('pins', 1.1034334337587337),
 ('parks', 1.1039466133649138),
 ('games', 1.1043198453651581),
 ('athletes', 1.1052616630056395),
 ('artists', 1.1054906910165305),
 ('drinking glasses', 1.1057018520466049),
 ('farmers', 1.1061412642862694),
 ('victory', 1.107042699196357),
 ('sports', 1.1073696426748045),
 ('gloves', 1.1087526110384798),
 ('for continental market', 1.109968309205741),
 ('horses', 1.1108988846564951),
 ('family', 1.1111165652623605),
 ('dining', 1.1126894936522482),
 ('evening', 1.113832152088094),
 ('weights and measures', 1.115716646154362),
 ('drinking', 1.1162029067632777),
 ('sleep', 1.1167069425402016),
 ('fishing', 1.117765288586614),
 ('writing', 1.1179813499245481),
 ('percussion instruments', 1.1187531255956649),
 ('leaves', 1.1201310247152156),
 ('profiles', 1.1206404696975931),
 ('hunting', 1.1209506694629408),
 ('children', 1.1210332544466695),
 ('samples', 1.1222275238069108),
 ('walking', 1.1231090481084842),
 ('musical instruments', 1.124022715399212),
 ('party', 1.125498210748638),
 ('writing implements', 1.1263423974190634),
 ('bow', 1.12696450413644),
 ('guitars', 1.1276205051558872),
 ('toys', 1.1287827670593364),
 ('stars', 1.1296191069131114),
 ('hands', 1.1298506173264875),
 ('summer', 1.1300565548866928),
 ('hair', 1.1303386881083433),
 ('heads', 1.1306583831558488),
 ('singing', 1.1315872078337688),
 ('storage furniture', 1.133052751126791),
 ('bottles', 1.133250448566519),
 ('for french market', 1.1365122729658084),
 ('living rooms', 1.1365756578465955),
 ('faces', 1.1381866470904314),
 ('crowd', 1.138294068714858),
 ('fans', 1.1393067150642235),
 ('transportation', 1.1406006081432751),
 ('body parts', 1.1406788336161726),
 ('ships', 1.1409690784292332),
 ('tea drinking', 1.141117375343958),
 ('writing systems', 1.1417147823720977),
 ('buttons', 1.1419228714662155),
 ('love', 1.1428515679563023),
 ('military equipment', 1.146170647839414),
 ('cups', 1.1472207693312266),
 ('markets', 1.147488651766101),
 ('trains', 1.1484284493644679),
 ('daily life', 1.1503854018071156),
 ('drums', 1.1506121383585262),
 ('still life', 1.1516735853126976),
 ('street scene', 1.1523397077347264),
 ('music', 1.1533998798306004),
 ('students', 1.1540079068813867),
 ('food', 1.154210792623875),
 ('night', 1.1596295435778217),
 ...]
In [15]:
# LABELS
labels = pd.read_csv(LABELS)
labels_attribute_id = [int(ids) for ids in labels.attribute_id]
labels_attribute_name = [name.replace("culture::", "").replace("tag::", "") for name in labels.attribute_name]
ids_2_names = dict(zip(list(labels_attribute_id), list(labels_attribute_name)))
names_2_ids = dict(zip(list(labels_attribute_name), list(labels_attribute_id)))

# TRAIN
train = pd.read_csv(TRAIN)
train_ids = train.id
train_attribute_ids = [   [int(i) for i in img_string.split(" ")]   for img_string in train.attribute_ids]
train_attribute_name = [   [ids_2_names[id]for id in ids]   for ids in train_attribute_ids]

# fake embeddings for NaN targets
for name in names_2_ids.keys():
    if name not in embedding_matrix.keys():
        embedding_matrix[name] = np.zeros(300).tolist()

train_attribute_embed = [   (np.array([embedding_matrix[name] for name in names]).sum(axis=0)/15).tolist()    for names in train_attribute_name]
train_attribute_name[:2]
Out[15]:
[['french', 'dogs', 'men'], ['british', 'dogs', 'horses', 'men']]
In [16]:
train_attribute_embed = [   (np.array([embedding_matrix[name] for name in names]).sum(axis=0)/15).tolist()    for names in train_attribute_name]



# ids_2_embed = dict(zip(list(train_attribute_ids), list(train_attribute_embed)))
# train_attribute_name[:2], train_attribute_embed[:2]
In [17]:
import cv2

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

def find_similar_image(target, train_attribute_ids, train_attribute_embed):
    target_embed = train_attribute_embed[train_ids.values.tolist().index(target)]
    scores = []
    ids = []
    for i, embed in enumerate(train_attribute_embed):
        ids.append(train_ids[i])
        scores.append(cosine(embed, target_embed))
    dic = dict(zip(ids, scores))
    sort = sorted(dic.items(), key=operator.itemgetter(1))
    return dict(sort)

# def show_images(list_images_path):
#     from IPython.display import Image, display
#     for imageName in list_images_path:
#         display(Image(filename=imageName))

# credit: https://gist.github.com/soply/f3eec2e79c165e39c9d540e916142ae1
def show_images(list_images_path, cols = 1, titles = None):
    """Display a list of images in a single figure with matplotlib.
    
    Parameters
    ---------
    images: List of np.arrays compatible with plt.imshow.
    
    cols (Default = 1): Number of columns in figure (number of rows is 
                        set to np.ceil(n_images/float(cols))).
    
    titles: List of titles corresponding to each image. Must have
            the same length as titles.
    """
    images = [cv2.imread(img) for img in list_images_path]
    assert((titles is None)or (len(images) == len(titles)))
    n_images = len(images)
    if titles is None: titles = ['Image (%d)' % i for i in range(1,n_images + 1)]
    fig = plt.figure()
    for n, (image, title) in enumerate(zip(images, titles)):
        a = fig.add_subplot(cols, np.ceil(n_images/float(cols)), n + 1)
        if image.ndim == 2:
            plt.gray()
        plt.imshow(image)
        a.set_title(title)
    fig.set_size_inches(np.array(fig.get_size_inches()) * n_images)
    plt.show()

# print(find_similar_image("1000483014d91860", train_attribute_ids, train_attribute_embed))
dic = find_similar_image("1000483014d91860", train_attribute_ids, train_attribute_embed)
show_images([TRAIN_IMG.format(img) for img in list(dic.keys())[:10]], cols=2, titles=list(dic.values())[:10])
/opt/conda/lib/python3.6/site-packages/scipy/spatial/distance.py:698: RuntimeWarning: invalid value encountered in double_scalars
  dist = 1.0 - uv / np.sqrt(uu * vv)
In [18]:
show_images([TRAIN_IMG.format(img) for img in list(dic.keys())[10:20]], cols=2, titles=list(dic.values())[10:20])
In [19]:
dic = find_similar_image("101c8394ff6db02d", train_attribute_ids, train_attribute_embed)
print(list(dic.values())[:10])
show_images([TRAIN_IMG.format(img) for img in list(dic.keys())[:10]], cols=2, titles=list(dic.values())[:10])
/opt/conda/lib/python3.6/site-packages/scipy/spatial/distance.py:698: RuntimeWarning: invalid value encountered in double_scalars
  dist = 1.0 - uv / np.sqrt(uu * vv)
[0.0, 0.10563241507206145, 0.21873428958227614, 0.21873428958227614, 0.21873428958227614, 0.21873428958227614, 0.2323020536851219, 0.23603991832177673, 0.23603991832177673, 0.23603991832177673]