2.bcc 156.7 KB
Newer Older
W
wizardforcel 已提交
1
{"font_size":0.4,"font_color":"#FFFFFF","background_alpha":0.5,"background_color":"#9C27B0","Stroke":"none","body":[{"from":4.59,"to":8.22,"location":2,"content":"okay hello everyone and welcome back to"},{"from":8.22,"to":13.5,"location":2,"content":"the second class of cs2 24in okay so"},{"from":13.5,"to":16.11,"location":2,"content":"right at the end of last time I was just"},{"from":16.11,"to":19.41,"location":2,"content":"showing you a little from this ipython"},{"from":19.41,"to":21.09,"location":2,"content":"notebook of things that you could do"},{"from":21.09,"to":22.95,"location":2,"content":"with word vectors but I kind of ran out"},{"from":22.95,"to":26.46,"location":2,"content":"of time a little bit so I'll just spend"},{"from":26.46,"to":28.35,"location":2,"content":"a couple of more minutes first I'm"},{"from":28.35,"to":30.45,"location":2,"content":"showing the end of this I stuck this"},{"from":30.45,"to":32.88,"location":2,"content":"ipython notebook up on the course page"},{"from":32.88,"to":35.31,"location":2,"content":"so under lecture 1 you can find a copy"},{"from":35.31,"to":38.01,"location":2,"content":"of it and you can download it so I both"},{"from":38.01,"to":40.44,"location":2,"content":"stuck up just an HTML version of it and"},{"from":40.44,"to":43.68,"location":2,"content":"a zip file like HTML file is only good"},{"from":43.68,"to":45.24,"location":2,"content":"to look at you can't do anything with it"},{"from":45.24,"to":46.92,"location":2,"content":"so you want to if you want to play with"},{"from":46.92,"to":49.53,"location":2,"content":"it by yourself download the zip file and"},{"from":49.53,"to":52.53,"location":2,"content":"get the ipython notebook out of that ok"},{"from":52.53,"to":54.45,"location":2,"content":"so we were looking at these glove word"},{"from":54.45,"to":56.19,"location":2,"content":"vectors which I'll talk about a bit more"},{"from":56.19,"to":58.44,"location":2,"content":"today and so there are these sort of"},{"from":58.44,"to":61.26,"location":2,"content":"basic results of similarity in this"},{"from":61.26,"to":65.24,"location":2,"content":"vector space worked very nicely for"},{"from":65.24,"to":69.87,"location":2,"content":"discovering similar words and then going"},{"from":69.87,"to":71.46,"location":2,"content":"on from that there was this idea that"},{"from":71.46,"to":73.47,"location":2,"content":"we'll spend some more time on today"},{"from":73.47,"to":77.67,"location":2,"content":"which was maybe this vector space is not"},{"from":77.67,"to":80.4,"location":2,"content":"only a similarity space we're close"},{"from":80.4,"to":83.19,"location":2,"content":"together things have similar meaning but"},{"from":83.19,"to":85.95,"location":2,"content":"it actually captures meaning and a"},{"from":85.95,"to":87.78,"location":2,"content":"considerably deeper and more profound"},{"from":87.78,"to":90.78,"location":2,"content":"way which is to say that there are"},{"from":90.78,"to":93.84,"location":2,"content":"actually directions in the space that"},{"from":93.84,"to":96.51,"location":2,"content":"you can point which have a certain"},{"from":96.51,"to":98.97,"location":2,"content":"meaning so that if you're pointing in"},{"from":98.97,"to":102.93,"location":2,"content":"one direction it means this is more so"},{"from":102.93,"to":104.85,"location":2,"content":"the case if you're pointing in a"},{"from":104.85,"to":106.71,"location":2,"content":"different direction in the meaning space"},{"from":106.71,"to":109.29,"location":2,"content":"it might be this is the capital of this"},{"from":109.29,"to":111.72,"location":2,"content":"country or all sorts of different"},{"from":111.72,"to":113.7,"location":2,"content":"meanings could be in code in the space"},{"from":113.7,"to":117.45,"location":2,"content":"and a way of testing that is to use"},{"from":117.45,"to":120.21,"location":2,"content":"these analogy problems and I quickly"},{"from":120.21,"to":122.58,"location":2,"content":"show this at the end but just to make"},{"from":122.58,"to":124.35,"location":2,"content":"sure everyone got it since it's sort of"},{"from":124.35,"to":127.14,"location":2,"content":"it's sort of a clever thing right so the"},{"from":127.14,"to":130.62,"location":2,"content":"idea is that we're going to start with a"},{"from":130.62,"to":135.18,"location":2,"content":"pair of words like King and man and so"},{"from":135.18,"to":136.71,"location":2,"content":"what we're going to do is we're going to"},{"from":136.71,"to":137.98,"location":2,"content":"say well there's a vector"},{"from":137.98,"to":141.46,"location":2,"content":"King in the space and there's a vector"},{"from":141.46,"to":145.24,"location":2,"content":"for man in the space and but what we're"},{"from":145.24,"to":147.52,"location":2,"content":"going to do is we're going to subtract"},{"from":147.52,"to":149.65,"location":2,"content":"as in just good old vector subtraction"},{"from":149.65,"to":151.62,"location":2,"content":"that you hopefully learned in your"},{"from":151.62,"to":154.27,"location":2,"content":"linear algebra class we're going to"},{"from":154.27,"to":156.34,"location":2,"content":"subtract the man vector from the king"},{"from":156.34,"to":158.8,"location":2,"content":"vector and the idea we have in our head"},{"from":158.8,"to":162.04,"location":2,"content":"then is if we do that what will happens"},{"from":162.04,"to":164.68,"location":2,"content":"we'll be left with the meaning of"},{"from":164.68,"to":170.26,"location":2,"content":"kingship without the madness and so then"},{"from":170.26,"to":173.71,"location":2,"content":"there's also a director - a vector for"},{"from":173.71,"to":176.68,"location":2,"content":"woman so we can add the woman vector to"},{"from":176.68,"to":178.93,"location":2,"content":"that resulting vector and then we could"},{"from":178.93,"to":181.42,"location":2,"content":"say well in the vector we end up at some"},{"from":181.42,"to":183.49,"location":2,"content":"point in the vector space and then we're"},{"from":183.49,"to":185.41,"location":2,"content":"going to say well what's the closest"},{"from":185.41,"to":187.99,"location":2,"content":"word that you can find to here and it's"},{"from":187.99,"to":190.03,"location":2,"content":"going to print out the closest word and"},{"from":190.03,"to":196.48,"location":2,"content":"as we saw last time lo and behold if you"},{"from":196.48,"to":200.71,"location":2,"content":"do that you get the answer and I'm"},{"from":200.71,"to":219.73,"location":2,"content":"saying you get King Nancy no wait I have"},{"from":219.73,"to":222.4,"location":2,"content":"to a first King and I sha sha sha sorry"},{"from":222.4,"to":225.64,"location":2,"content":"whoops yeah okay I kind of do it well"},{"from":225.64,"to":234.76,"location":2,"content":"like man King okay okay yeah that's"},{"from":234.76,"to":237.28,"location":2,"content":"right sorry okay yeah cuz it should be"},{"from":237.28,"to":239.71,"location":2,"content":"man as the King as woman as to something"},{"from":239.71,"to":242.47,"location":2,"content":"sorry yeah I was getting my order of"},{"from":242.47,"to":246.61,"location":2,"content":"components wrong okay and you know as I"},{"from":246.61,"to":248.77,"location":2,"content":"was sort of I guess I was showing some"},{"from":248.77,"to":251.65,"location":2,"content":"examples last time with nationality"},{"from":251.65,"to":255.7,"location":2,"content":"words but I mean this in a way that is"},{"from":255.7,"to":258.7,"location":2,"content":"sort of surprising too shocking this"},{"from":258.7,"to":261.49,"location":2,"content":"actually works for all kinds of things"},{"from":261.49,"to":263.77,"location":2,"content":"that you can get meaning in this space"},{"from":263.77,"to":267.76,"location":2,"content":"so I can ask various kinds of analogies"},{"from":267.76,"to":270.58,"location":2,"content":"of looser sort so I can say Australia is"},{"from":270.58,"to":271.51,"location":2,"content":"to be"},{"from":271.51,"to":275.56,"location":2,"content":"as France is to wine you might think why"},{"from":275.56,"to":276.94,"location":2,"content":"and what it gives back is champagne"},{"from":276.94,"to":279.64,"location":2,"content":"which seems a pretty good answer okay"},{"from":279.64,"to":284.02,"location":2,"content":"with that you can do more syntactic"},{"from":284.02,"to":286.93,"location":2,"content":"facts so I can say tall tall tall as the"},{"from":286.93,"to":289.99,"location":2,"content":"tallest as long as to longest and it"},{"from":289.99,"to":294.31,"location":2,"content":"gets that I say good is to fantastic as"},{"from":294.31,"to":297.76,"location":2,"content":"bad is to terrible then it seems to get"},{"from":297.76,"to":300.1,"location":2,"content":"out that there's some kind of notion of"},{"from":300.1,"to":303.76,"location":2,"content":"make more extremes direction and get"},{"from":303.76,"to":305.82,"location":2,"content":"this direction out I skipped over one"},{"from":305.82,"to":310.17,"location":2,"content":"Obama is two Clinton as Reagan is two"},{"from":310.17,"to":312.55,"location":2,"content":"you may or may not like the answer it"},{"from":312.55,"to":315.94,"location":2,"content":"gives for this one as Obama is to this"},{"from":315.94,"to":318.88,"location":2,"content":"Reagan is to Nixon now one thing you"},{"from":318.88,"to":321.4,"location":2,"content":"might notice at this point and this is"},{"from":321.4,"to":322.96,"location":2,"content":"something I actually want to come back"},{"from":322.96,"to":325.69,"location":2,"content":"to at the end well there's this problem"},{"from":325.69,"to":328.03,"location":2,"content":"because Clinton's ambiguous right"},{"from":328.03,"to":331.66,"location":2,"content":"there's Bill or live Hillary and I"},{"from":331.66,"to":335.68,"location":2,"content":"forget you know so this data as I said"},{"from":335.68,"to":337.78,"location":2,"content":"is a few years old so this data was done"},{"from":337.78,"to":341.98,"location":2,"content":"in 2014 so in sort of in it definitely"},{"from":341.98,"to":343.39,"location":2,"content":"doesn't have Trump really in it as a"},{"from":343.39,"to":345.64,"location":2,"content":"politician but you know it would have"},{"from":345.64,"to":348.4,"location":2,"content":"variously both Clinton's but as sort of"},{"from":348.4,"to":351.43,"location":2,"content":"make sense of probably for sort of proof"},{"from":351.43,"to":354.37,"location":2,"content":"for 2014 data that Bill Clinton"},{"from":354.37,"to":356.8,"location":2,"content":"dominated so I think what we're getting"},{"from":356.8,"to":360.97,"location":2,"content":"out of this is that Clinton and Nixon"},{"from":360.97,"to":362.92,"location":2,"content":"are sort of similar of people in dangers"},{"from":362.92,"to":368.53,"location":2,"content":"of being impeached and on both sides of"},{"from":368.53,"to":370.75,"location":2,"content":"the aisle and of thinking primarily of"},{"from":370.75,"to":373.57,"location":2,"content":"Bill Clinton but if this sort of brings"},{"from":373.57,"to":375.37,"location":2,"content":"up something that I'll come back to"},{"from":375.37,"to":378.34,"location":2,"content":"right at the end of it sort of looks"},{"from":378.34,"to":380.16,"location":2,"content":"like we've got a sort of a problem here"},{"from":380.16,"to":382.69,"location":2,"content":"because we just have this string"},{"from":382.69,"to":387.06,"location":2,"content":"literally Clinton and that string is any"},{"from":387.06,"to":391.36,"location":2,"content":"possible sense and meaning of the string"},{"from":391.36,"to":396.58,"location":2,"content":"Clinton and so minimally that we have"},{"from":396.58,"to":398.71,"location":2,"content":"Bill Clinton and Hillary Clinton but you"},{"from":398.71,"to":399.88,"location":2,"content":"know maybe you have some friends there"},{"from":399.88,"to":401.59,"location":2,"content":"called Clinton as well right and they're"},{"from":401.59,"to":404.11,"location":2,"content":"all mixed together in this Clinton and"},{"from":404.11,"to":405.1,"location":2,"content":"so that seems kind"},{"from":405.1,"to":406.9,"location":2,"content":"problematic and that sort of been an"},{"from":406.9,"to":408.37,"location":2,"content":"issue that's been discussed some for"},{"from":408.37,"to":410.2,"location":2,"content":"these word vectors and I'll come back to"},{"from":410.2,"to":413.56,"location":2,"content":"that another thing you can do is you can"},{"from":413.56,"to":415.9,"location":2,"content":"give a set of words and say which is the"},{"from":415.9,"to":417.94,"location":2,"content":"odd one out may be used to do puzzles"},{"from":417.94,"to":419.82,"location":2,"content":"like that in middle school or something"},{"from":419.82,"to":422.5,"location":2,"content":"and so you can do that and it decides"},{"from":422.5,"to":424.3,"location":2,"content":"that cereal is the outline out of that"},{"from":424.3,"to":427.42,"location":2,"content":"set mm-hmm seems okay and then one other"},{"from":427.42,"to":430.9,"location":2,"content":"thing I'll just show you is so it sort"},{"from":430.9,"to":432.55,"location":2,"content":"of be nice to look at these words as"},{"from":432.55,"to":435.28,"location":2,"content":"I've drawn them in some of the slide"},{"from":435.28,"to":437.47,"location":2,"content":"pictures so this is saying to put"},{"from":437.47,"to":439.78,"location":2,"content":"together a PCEHR principal components"},{"from":439.78,"to":444.58,"location":2,"content":"analysis scatter plot so I can do that"},{"from":444.58,"to":447.34,"location":2,"content":"and then I can say give it a set of"},{"from":447.34,"to":451.06,"location":2,"content":"words and draw me these as a scatter"},{"from":451.06,"to":455.32,"location":2,"content":"plot and hopefully if I can just about"},{"from":455.32,"to":458.83,"location":2,"content":"fit it in here is my scatter plot and it"},{"from":458.83,"to":460.42,"location":2,"content":"works pretty well right I've got the"},{"from":460.42,"to":462.73,"location":2,"content":"wine champagne beer up here then the"},{"from":462.73,"to":465.58,"location":2,"content":"coffee and tea here are the country easy"},{"from":465.58,"to":467.44,"location":2,"content":"as the school's college institute"},{"from":467.44,"to":473.49,"location":2,"content":"universities the animals are down here"},{"from":473.49,"to":476.8,"location":2,"content":"food stuffs there so yeah this sort of"},{"from":476.8,"to":478.42,"location":2,"content":"really does work with this tube"},{"from":478.42,"to":480.4,"location":2,"content":"Direction dimensional display it"},{"from":480.4,"to":484.6,"location":2,"content":"basically shows you similarity now there"},{"from":484.6,"to":487.81,"location":2,"content":"are you know to some extent though you"},{"from":487.81,"to":489.43,"location":2,"content":"want to hold on to your wallet with"},{"from":489.43,"to":491.59,"location":2,"content":"these PCA displays as I've discussed"},{"from":491.59,"to":493.78,"location":2,"content":"before since you are taking something"},{"from":493.78,"to":495.7,"location":2,"content":"that was a hundred dimensional and we're"},{"from":495.7,"to":497.89,"location":2,"content":"just doing this 2d projection there's"},{"from":497.89,"to":500.62,"location":2,"content":"capturing some of the major geometry of"},{"from":500.62,"to":503.53,"location":2,"content":"the space but it just has to be losing a"},{"from":503.53,"to":505.69,"location":2,"content":"huge amount of the information so when"},{"from":505.69,"to":508.48,"location":2,"content":"things end up close together they might"},{"from":508.48,"to":510.19,"location":2,"content":"be really close together in the original"},{"from":510.19,"to":512.32,"location":2,"content":"space or they might just have been words"},{"from":512.32,"to":515.5,"location":2,"content":"that lost in the 2d projection because"},{"from":515.5,"to":517.93,"location":2,"content":"there are other patterns that were more"},{"from":517.93,"to":520.03,"location":2,"content":"dominant and were chosen was the first"},{"from":520.03,"to":522.25,"location":2,"content":"two principal components so you sort of"},{"from":522.25,"to":524.53,"location":2,"content":"don't want to over trust these things"},{"from":524.53,"to":527.02,"location":2,"content":"and something if you like in phobias you"},{"from":527.02,"to":528.79,"location":2,"content":"might think about is how there are other"},{"from":528.79,"to":530.95,"location":2,"content":"ways that I might be able to represent"},{"from":530.95,"to":532.69,"location":2,"content":"the distances in a way that was more"},{"from":532.69,"to":535.75,"location":2,"content":"accurate anyway this is very simple to"},{"from":535.75,"to":537.85,"location":2,"content":"do right I'm just getting a PCA to"},{"from":537.85,"to":538.81,"location":2,"content":"reduce that"},{"from":538.81,"to":540.78,"location":2,"content":"analogy of the matrix and then"},{"from":540.78,"to":542.35,"location":2,"content":"transforming with it"},{"from":542.35,"to":544.71,"location":2,"content":"these word vectors and printing them"},{"from":544.71,"to":547.9,"location":2,"content":"it's mainly easy to do the bit that"},{"from":547.9,"to":550.48,"location":2,"content":"wasn't easy for me to do but if"},{"from":550.48,"to":552.57,"location":2,"content":"someone's got some clever Python"},{"from":552.57,"to":555.16,"location":2,"content":"plotting tips I'd like one if someone"},{"from":555.16,"to":557.26,"location":2,"content":"wants to send me a message after class I"},{"from":557.26,"to":558.76,"location":2,"content":"would have thought there'd be some"},{"from":558.76,"to":560.95,"location":2,"content":"default way in which you could just"},{"from":560.95,"to":563.62,"location":2,"content":"label points in a scatter plot but I"},{"from":563.62,"to":566.46,"location":2,"content":"wasn't able to find one so what I did"},{"from":566.46,"to":569.2,"location":2,"content":"was I'm just sort of plotting the text"},{"from":569.2,"to":570.76,"location":2,"content":"and I'm offsetting it a little bit from"},{"from":570.76,"to":573.13,"location":2,"content":"the points now that works kind of"},{"from":573.13,"to":574.72,"location":2,"content":"crapoly because they just collide with"},{"from":574.72,"to":577.21,"location":2,"content":"each other as you can see so it'd be"},{"from":577.21,"to":578.56,"location":2,"content":"better if there was a better way to do"},{"from":578.56,"to":581.56,"location":2,"content":"point labeling in Python plots so if"},{"from":581.56,"to":583.6,"location":2,"content":"anyone knows the answer to that one you"},{"from":583.6,"to":589.71,"location":2,"content":"can send it to me okay so that's that"},{"from":589.71,"to":591.73,"location":2,"content":"and if you haven't used the ipython"},{"from":591.73,"to":593.77,"location":2,"content":"notebooks before and you don't want your"},{"from":593.77,"to":595.99,"location":2,"content":"computer to run really slowly it's a"},{"from":595.99,"to":598.09,"location":2,"content":"good idea to halt your ipython notebooks"},{"from":598.09,"to":599.38,"location":2,"content":"when you're not going to be using them"},{"from":599.38,"to":601.66,"location":2,"content":"anymore especially if they're computing"},{"from":601.66,"to":625.73,"location":2,"content":"something okay"},{"from":625.73,"to":632.07,"location":2,"content":"okay so now lecture 2 and so for today"},{"from":632.07,"to":633.48,"location":2,"content":"we're going to keep on talking about"},{"from":633.48,"to":635.79,"location":2,"content":"things you can do with wood vectors and"},{"from":635.79,"to":638.07,"location":2,"content":"say a little bit at the end about word"},{"from":638.07,"to":641.97,"location":2,"content":"sensors so in more detail I'm gonna say"},{"from":641.97,"to":646.05,"location":2,"content":"a bit more about word to vac I'm going"},{"from":646.05,"to":648.03,"location":2,"content":"to have a sort of a very brief excursion"},{"from":648.03,"to":651.6,"location":2,"content":"on optimization but then I sort of want"},{"from":651.6,"to":654.39,"location":2,"content":"to explain a bit more of the space of"},{"from":654.39,"to":658.14,"location":2,"content":"what people have done and can do with"},{"from":658.14,"to":661.05,"location":2,"content":"dense word representations so I'm going"},{"from":661.05,"to":663.09,"location":2,"content":"to say something about count based"},{"from":663.09,"to":666.06,"location":2,"content":"approaches to capturing meaning and how"},{"from":666.06,"to":668.22,"location":2,"content":"do they work I'm going to talk to a bit"},{"from":668.22,"to":668.85,"location":2,"content":"about it"},{"from":668.85,"to":670.68,"location":2,"content":"a different model of word vectors which"},{"from":670.68,"to":675.27,"location":2,"content":"was the glove model that as a postdoc of"},{"from":675.27,"to":679.11,"location":2,"content":"mine Jeffery Pennington and me worked on"},{"from":679.11,"to":681.66,"location":2,"content":"a couple of years ago talk some about"},{"from":681.66,"to":683.85,"location":2,"content":"evaluation really quite dominant theme"},{"from":683.85,"to":685.77,"location":2,"content":"on a lot of what we do are natural"},{"from":685.77,"to":688.17,"location":2,"content":"language processing is how do we how do"},{"from":688.17,"to":690.21,"location":2,"content":"we evaluate things and how much do we"},{"from":690.21,"to":692.79,"location":2,"content":"trust our evaluations and then say a"},{"from":692.79,"to":695.46,"location":2,"content":"little bit about word sensors I have a"},{"from":695.46,"to":697.38,"location":2,"content":"sort of a goal here which is that by the"},{"from":697.38,"to":699.69,"location":2,"content":"end of the class you should actually"},{"from":699.69,"to":702.75,"location":2,"content":"sort of understand enough of the lay of"},{"from":702.75,"to":704.94,"location":2,"content":"the land that you could read papers"},{"from":704.94,"to":706.95,"location":2,"content":"about word vectors such as the ones that"},{"from":706.95,"to":708.66,"location":2,"content":"are in the syllabus and actually"},{"from":708.66,"to":710.31,"location":2,"content":"understand them and where they're coming"},{"from":710.31,"to":712.89,"location":2,"content":"from and roughly how they work and so"},{"from":712.89,"to":714.03,"location":2,"content":"you know if you really want to minimize"},{"from":714.03,"to":716.46,"location":2,"content":"work for York this class you could think"},{"from":716.46,"to":718.14,"location":2,"content":"I know everything I need to know after"},{"from":718.14,"to":719.91,"location":2,"content":"the first week and I'm going to do a"},{"from":719.91,"to":722.19,"location":2,"content":"final project on word vectors and I'll"},{"from":722.19,"to":724.83,"location":2,"content":"be ok and you know you could actually do"},{"from":724.83,"to":728.07,"location":2,"content":"that I'll mention during the class a"},{"from":728.07,"to":730.68,"location":2,"content":"couple of recent pieces of work on word"},{"from":730.68,"to":733.56,"location":2,"content":"vectors on the other hand doing things"},{"from":733.56,"to":735.78,"location":2,"content":"with word vectors is a fairly mined out"},{"from":735.78,"to":738.18,"location":2,"content":"area so you're probably better off I'm"},{"from":738.18,"to":739.53,"location":2,"content":"also listening to some of the later"},{"from":739.53,"to":743.16,"location":2,"content":"parts of the class ok so remember we had"},{"from":743.16,"to":745.38,"location":2,"content":"this idea of word to vac so it was an"},{"from":745.38,"to":748.74,"location":2,"content":"iterative updating algorithm that"},{"from":748.74,"to":751.89,"location":2,"content":"learned these vector representations of"},{"from":751.89,"to":753.84,"location":2,"content":"words that in some sense capture their"},{"from":753.84,"to":755.94,"location":2,"content":"meaning and the way it worked was we"},{"from":755.94,"to":757.65,"location":2,"content":"kind of moved position by position"},{"from":757.65,"to":759.24,"location":2,"content":"through a corpus"},{"from":759.24,"to":761.67,"location":2,"content":"each point in time we had a center word"},{"from":761.67,"to":765.18,"location":2,"content":"here into and it's trying to predict the"},{"from":765.18,"to":767.4,"location":2,"content":"words around that by having a"},{"from":767.4,"to":769.83,"location":2,"content":"probability distribution over words will"},{"from":769.83,"to":771.84,"location":2,"content":"occur around that and that probability"},{"from":771.84,"to":774.66,"location":2,"content":"distribution is defined simply in terms"},{"from":774.66,"to":777.93,"location":2,"content":"of the dot product of the word vectors"},{"from":777.93,"to":781.08,"location":2,"content":"via the softmax function and so what we"},{"from":781.08,"to":783.66,"location":2,"content":"want to do is change those vectors in a"},{"from":783.66,"to":785.85,"location":2,"content":"way that this gives good probability"},{"from":785.85,"to":788.76,"location":2,"content":"predictions it gives as high probability"},{"from":788.76,"to":790.98,"location":2,"content":"as possible to words that you tend to"},{"from":790.98,"to":793.8,"location":2,"content":"see in the context and so just to drill"},{"from":793.8,"to":795.96,"location":2,"content":"that in a little bit more you know what"},{"from":795.96,"to":800.13,"location":2,"content":"we actually have is we have two matrices"},{"from":800.13,"to":803.88,"location":2,"content":"right we have four Center words we have"},{"from":803.88,"to":805.86,"location":2,"content":"a matrix where for each word now"},{"from":805.86,"to":809.13,"location":2,"content":"vocabulary we have a vector and that"},{"from":809.13,"to":810.99,"location":2,"content":"this this is probably as good a point as"},{"from":810.99,"to":813.57,"location":2,"content":"any to say that it turns out that all"},{"from":813.57,"to":815.7,"location":2,"content":"the major deep learning packages"},{"from":815.7,"to":819.12,"location":2,"content":"tensorflow pi torch etc for their word"},{"from":819.12,"to":821.82,"location":2,"content":"vectors the word vectors are represented"},{"from":821.82,"to":824.16,"location":2,"content":"as rows if you've done a bunch of math"},{"from":824.16,"to":826.26,"location":2,"content":"classes that might not be what you would"},{"from":826.26,"to":827.97,"location":2,"content":"expect you might have expected the other"},{"from":827.97,"to":830.16,"location":2,"content":"way around but they all put them in rows"},{"from":830.16,"to":834.03,"location":2,"content":"so we can have rows for our so we have"},{"from":834.03,"to":836.25,"location":2,"content":"six words and a five dimensional vector"},{"from":836.25,"to":839.72,"location":2,"content":"each okay and then we have this outside"},{"from":839.72,"to":842.31,"location":2,"content":"matrix where we also have a second"},{"from":842.31,"to":844.89,"location":2,"content":"vector for each word which is this"},{"from":844.89,"to":849.3,"location":2,"content":"representation in context so when we"},{"from":849.3,"to":851.58,"location":2,"content":"have a particular Center word here word"},{"from":851.58,"to":853.62,"location":2,"content":"for you know when we're doing our"},{"from":853.62,"to":856.29,"location":2,"content":"computations we're taking a dot product"},{"from":856.29,"to":861.03,"location":2,"content":"between v4 and each row of U and that's"},{"from":861.03,"to":864.51,"location":2,"content":"then giving us a vector of dot products"},{"from":864.51,"to":867.45,"location":2,"content":"scores and so then after that we're"},{"from":867.45,"to":869.55,"location":2,"content":"running soft maxes on each of those"},{"from":869.55,"to":872.61,"location":2,"content":"numbers doing it element wise and that's"},{"from":872.61,"to":873.69,"location":2,"content":"then giving us a probability"},{"from":873.69,"to":876.78,"location":2,"content":"distribution over words in the context"},{"from":876.78,"to":879.84,"location":2,"content":"and there's sort of things to notice"},{"from":879.84,"to":882.96,"location":2,"content":"there which hopefully you noticed last"},{"from":882.96,"to":884.76,"location":2,"content":"time but to make sure you notice that"},{"from":884.76,"to":887.94,"location":2,"content":"you know we've just got one probability"},{"from":887.94,"to":890.25,"location":2,"content":"distribution right so in terms of what"},{"from":890.25,"to":891.95,"location":2,"content":"words we predict we're pretty"},{"from":891.95,"to":893.72,"location":2,"content":"acting exactly the same probability"},{"from":893.72,"to":896.3,"location":2,"content":"distribution every position we've sort"},{"from":896.3,"to":898.04,"location":2,"content":"of saying the most likely word one to"},{"from":898.04,"to":900.68,"location":2,"content":"the left is whatever it is house the"},{"from":900.68,"to":902.27,"location":2,"content":"most likely word two to the left is"},{"from":902.27,"to":904.67,"location":2,"content":"house three de left is house one of the"},{"from":904.67,"to":907.01,"location":2,"content":"right should be house too right so it's"},{"from":907.01,"to":908.81,"location":2,"content":"sort of know sort of fineness of"},{"from":908.81,"to":911.17,"location":2,"content":"prediction it's just an overall kind of"},{"from":911.17,"to":913.73,"location":2,"content":"probability distribution of words that"},{"from":913.73,"to":916.49,"location":2,"content":"are likely to occur in my context so all"},{"from":916.49,"to":919.46,"location":2,"content":"we're asking for is a model that gives"},{"from":919.46,"to":922.07,"location":2,"content":"reasonably high probability estimates to"},{"from":922.07,"to":925.64,"location":2,"content":"all words that occur in the context of"},{"from":925.64,"to":928.37,"location":2,"content":"this word relatively often there's"},{"from":928.37,"to":930.32,"location":2,"content":"nothing more to it than that and that's"},{"from":930.32,"to":932.06,"location":2,"content":"part of why it's sort of surprising when"},{"from":932.06,"to":934.4,"location":2,"content":"you've got such a simplistic thing that"},{"from":934.4,"to":936.35,"location":2,"content":"it seems like at the end of the day it"},{"from":936.35,"to":939.14,"location":2,"content":"can end up capturing so much about the"},{"from":939.14,"to":941.51,"location":2,"content":"meanings of words and aspects of the"},{"from":941.51,"to":943.94,"location":2,"content":"meanings of words like in the examples I"},{"from":943.94,"to":945.73,"location":2,"content":"was just showing you in the ipython"},{"from":945.73,"to":948.02,"location":2,"content":"notebook"},{"from":948.02,"to":951.98,"location":2,"content":"and there's one other thing that I was"},{"from":951.98,"to":954.14,"location":2,"content":"gonna say oh yeah one other thing I was"},{"from":954.14,"to":956.24,"location":2,"content":"going to say was the other thing that"},{"from":956.24,"to":959.96,"location":2,"content":"might occur to you from this is well"},{"from":959.96,"to":962.36,"location":2,"content":"wait a minute there was like that and"},{"from":962.36,"to":966.8,"location":2,"content":"and and of that occur all the time so"},{"from":966.8,"to":971.42,"location":2,"content":"that means every word must have a high"},{"from":971.42,"to":975.11,"location":2,"content":"dot product with words like that an oven"},{"from":975.11,"to":978.53,"location":2,"content":"and to get their probabilities right and"},{"from":978.53,"to":982.22,"location":2,"content":"the first answer to that is yup that's"},{"from":982.22,"to":984.59,"location":2,"content":"true and it turns out that all word"},{"from":984.59,"to":988.4,"location":2,"content":"vectors have a very strong word"},{"from":988.4,"to":990.59,"location":2,"content":"probability component that reflects that"},{"from":990.59,"to":993.8,"location":2,"content":"and I mean one of the things that some"},{"from":993.8,"to":996.83,"location":2,"content":"workers discussed so on the readings"},{"from":996.83,"to":998.72,"location":2,"content":"there are two papers from Sanjeev"},{"from":998.72,"to":1001.09,"location":2,"content":"Aurora's group in Princeton and one of"},{"from":1001.09,"to":1004.39,"location":2,"content":"those papers sort of discusses this"},{"from":1004.39,"to":1007.15,"location":2,"content":"probability high frequency effect and"},{"from":1007.15,"to":1010,"location":2,"content":"you know a crude way of actually fixing"},{"from":1010,"to":1012.19,"location":2,"content":"this high frequency effect is that"},{"from":1012.19,"to":1017.14,"location":2,"content":"normally the first the first biggest"},{"from":1017.14,"to":1019.39,"location":2,"content":"component in your word vectors is"},{"from":1019.39,"to":1021.46,"location":2,"content":"actually a frequency effect and if you"},{"from":1021.46,"to":1022.93,"location":2,"content":"just lop it off you can make your"},{"from":1022.93,"to":1025.76,"location":2,"content":"semantic similarities better"},{"from":1025.76,"to":1027.83,"location":2,"content":"but there are other things that we do to"},{"from":1027.83,"to":1030.41,"location":2,"content":"sort of deal with high frequencies okay"},{"from":1030.41,"to":1032.84,"location":2,"content":"so we get these lovely spacers that I've"},{"from":1032.84,"to":1035.54,"location":2,"content":"shown some of but I'll make one more"},{"from":1035.54,"to":1040.76,"location":2,"content":"remark yeah so did I say this last time"},{"from":1040.76,"to":1046.52,"location":2,"content":"oh my remark anyway is that we show all"},{"from":1046.52,"to":1048.8,"location":2,"content":"these two-dimensional pictures they're"},{"from":1048.8,"to":1051.56,"location":2,"content":"exceedingly exceedingly misleading"},{"from":1051.56,"to":1054.62,"location":2,"content":"because in these pick two-dimensional"},{"from":1054.62,"to":1057.71,"location":2,"content":"pictures you know you have these effects"},{"from":1057.71,"to":1061.25,"location":2,"content":"that if you know Samsung is close to"},{"from":1061.25,"to":1064.19,"location":2,"content":"Nokia it has to be over here and then it"},{"from":1064.19,"to":1066.44,"location":2,"content":"has to be far away from words that are"},{"from":1066.44,"to":1069.5,"location":2,"content":"over here whereas you might sort of also"},{"from":1069.5,"to":1071.48,"location":2,"content":"want to have the effect that Nokia is"},{"from":1071.48,"to":1073.85,"location":2,"content":"close to Finland for a different reason"},{"from":1073.85,"to":1076.81,"location":2,"content":"and you can't do that in two dimensional"},{"from":1076.81,"to":1080.17,"location":2,"content":"vector spaces but you know one of the"},{"from":1080.17,"to":1082.4,"location":2,"content":"most of the properties of high"},{"from":1082.4,"to":1084.08,"location":2,"content":"dimensional vector spaces are very"},{"from":1084.08,"to":1086.48,"location":2,"content":"unintuitive and one of the ways that"},{"from":1086.48,"to":1088.22,"location":2,"content":"they're unintuitive is in a high"},{"from":1088.22,"to":1090.53,"location":2,"content":"dimensional vector space a word can be"},{"from":1090.53,"to":1093.41,"location":2,"content":"close to lots of other words in"},{"from":1093.41,"to":1099.14,"location":2,"content":"different directions okay so we sort of"},{"from":1099.14,"to":1102.34,"location":2,"content":"started to talk about how we went about"},{"from":1102.34,"to":1105.56,"location":2,"content":"learning these word vectors I'm sort of"},{"from":1105.56,"to":1109.52,"location":2,"content":"going to take about a five minute detour"},{"from":1109.52,"to":1112.4,"location":2,"content":"into optimization now this isn't really"},{"from":1112.4,"to":1114.62,"location":2,"content":"an optimization class if you want to"},{"from":1114.62,"to":1116.54,"location":2,"content":"learn a lot about optimization well you"},{"from":1116.54,"to":1118.67,"location":2,"content":"can learn more about optimization if you"},{"from":1118.67,"to":1120.74,"location":2,"content":"do 229 and if you do something like"},{"from":1120.74,"to":1123.05,"location":2,"content":"Stephen Boyd's optimization class you"},{"from":1123.05,"to":1125.63,"location":2,"content":"can learn a lot of optimization but this"},{"from":1125.63,"to":1127.58,"location":2,"content":"is so a really baby optimization but"},{"from":1127.58,"to":1129.02,"location":2,"content":"just to make sure it runs on the same"},{"from":1129.02,"to":1132.8,"location":2,"content":"page here are three slides right so what"},{"from":1132.8,"to":1135.17,"location":2,"content":"we did at the end what we did over there"},{"from":1135.17,"to":1137.6,"location":2,"content":"where I apologize that my writing was"},{"from":1137.6,"to":1140.6,"location":2,"content":"too small but that will give you the"},{"from":1140.6,"to":1142.91,"location":2,"content":"chance to when doing homework two and"},{"from":1142.91,"to":1144.92,"location":2,"content":"you have to write that out to work it"},{"from":1144.92,"to":1147.35,"location":2,"content":"out for yourself and learn more in the"},{"from":1147.35,"to":1150.68,"location":2,"content":"process right so what we had was a cost"},{"from":1150.68,"to":1152.75,"location":2,"content":"function when we wanted to minimize and"},{"from":1152.75,"to":1155.33,"location":2,"content":"so what we did was we did a bit of"},{"from":1155.33,"to":1158.06,"location":2,"content":"calculus to count calculate the gradient"},{"from":1158.06,"to":1159.35,"location":2,"content":"of the cost function"},{"from":1159.35,"to":1162.47,"location":2,"content":"with respect to our word vectors which"},{"from":1162.47,"to":1165.17,"location":2,"content":"were our variables theta and then what"},{"from":1165.17,"to":1168.11,"location":2,"content":"we want to do is say well if we take a"},{"from":1168.11,"to":1171.53,"location":2,"content":"small step in the direction of the"},{"from":1171.53,"to":1173.06,"location":2,"content":"negative of the gradient"},{"from":1173.06,"to":1176.36,"location":2,"content":"that'll be taking us down down hill in"},{"from":1176.36,"to":1178.58,"location":2,"content":"this space and we want to keep on doing"},{"from":1178.58,"to":1181.43,"location":2,"content":"that and sort of head to the minimum of"},{"from":1181.43,"to":1183.74,"location":2,"content":"our space I mean of course in our high"},{"from":1183.74,"to":1185.93,"location":2,"content":"multi-dimensional space you know it"},{"from":1185.93,"to":1187.82,"location":2,"content":"might not be a nice smooth curve like"},{"from":1187.82,"to":1189.77,"location":2,"content":"this it might be a horrible and non"},{"from":1189.77,"to":1193.19,"location":2,"content":"convex curve but that's just the idea so"},{"from":1193.19,"to":1194.75,"location":2,"content":"essentially we're saying we've got the"},{"from":1194.75,"to":1198.47,"location":2,"content":"old parameters we work out the gradient"},{"from":1198.47,"to":1200.12,"location":2,"content":"of the objective function using those"},{"from":1200.12,"to":1202.85,"location":2,"content":"old parameters we multiply that by a"},{"from":1202.85,"to":1206.9,"location":2,"content":"small alpha which is our step size or"},{"from":1206.9,"to":1208.34,"location":2,"content":"learning rate because we only want to"},{"from":1208.34,"to":1210.89,"location":2,"content":"move a little bit each time because if"},{"from":1210.89,"to":1213.86,"location":2,"content":"back here if we sort of said downhill is"},{"from":1213.86,"to":1216.14,"location":2,"content":"this way and said great let's go a long"},{"from":1216.14,"to":1218.03,"location":2,"content":"way that way you could kind of complete"},{"from":1218.03,"to":1219.62,"location":2,"content":"the overshoot so we only want to go a"},{"from":1219.62,"to":1222.44,"location":2,"content":"little bit each time so we normally have"},{"from":1222.44,"to":1224.87,"location":2,"content":"a small learning rate alpha and so we"},{"from":1224.87,"to":1227.21,"location":2,"content":"subtract a small multiple of the"},{"from":1227.21,"to":1230,"location":2,"content":"gradient and we from the old parameters"},{"from":1230,"to":1232.91,"location":2,"content":"and we get our new parameters and that's"},{"from":1232.91,"to":1234.44,"location":2,"content":"sort of effectively being worked out"},{"from":1234.44,"to":1237.32,"location":2,"content":"component wise as is shown below that"},{"from":1237.32,"to":1238.7,"location":2,"content":"we're just doing that for each of the"},{"from":1238.7,"to":1241.4,"location":2,"content":"partial derivatives and then that our"},{"from":1241.4,"to":1243.35,"location":2,"content":"hope is that that will let us gradually"},{"from":1243.35,"to":1246.44,"location":2,"content":"walk down this surface now if you"},{"from":1246.44,"to":1248.72,"location":2,"content":"actually did this it would be"},{"from":1248.72,"to":1251.27,"location":2,"content":"unbelievably bad for the kind of systems"},{"from":1251.27,"to":1253.79,"location":2,"content":"that we built and there's a lot of work"},{"from":1253.79,"to":1256.28,"location":2,"content":"on clever optimization but the most"},{"from":1256.28,"to":1259.22,"location":2,"content":"basic thing which you definitely need to"},{"from":1259.22,"to":1263.93,"location":2,"content":"know is that well our objective function"},{"from":1263.93,"to":1267.62,"location":2,"content":"here J of theta was a function of our"},{"from":1267.62,"to":1270.26,"location":2,"content":"entire corpus right and to get this to"},{"from":1270.26,"to":1272.24,"location":2,"content":"work well the first thing you want to do"},{"from":1272.24,"to":1274.79,"location":2,"content":"is you know collect a few billion words"},{"from":1274.79,"to":1277.43,"location":2,"content":"of your favorite language and then say"},{"from":1277.43,"to":1279.65,"location":2,"content":"go and build a word to back model for me"},{"from":1279.65,"to":1284.9,"location":2,"content":"and so if you have to evaluate a billion"},{"from":1284.9,"to":1288.29,"location":2,"content":"Center words and maybe then for each of"},{"from":1288.29,"to":1290.42,"location":2,"content":"ten billion context words if you have"},{"from":1290.42,"to":1292.7,"location":2,"content":"the window size of five and"},{"from":1292.7,"to":1294.68,"location":2,"content":"so you have to do these sort of 10"},{"from":1294.68,"to":1298.61,"location":2,"content":"billion softmax calculations before you"},{"from":1298.61,"to":1300.98,"location":2,"content":"work out what your gradient is that"},{"from":1300.98,"to":1302.18,"location":2,"content":"you're going to be having your computer"},{"from":1302.18,"to":1305.15,"location":2,"content":"computer for quite a long time before"},{"from":1305.15,"to":1307.22,"location":2,"content":"you make one little step in the gradient"},{"from":1307.22,"to":1309.14,"location":2,"content":"and so things are going to go so so"},{"from":1309.14,"to":1312.2,"location":2,"content":"slowly so no one does that in deep"},{"from":1312.2,"to":1315.17,"location":2,"content":"learning systems so what people everyone"},{"from":1315.17,"to":1317.78,"location":2,"content":"does is use the Casta gradient descent"},{"from":1317.78,"to":1320.9,"location":2,"content":"and in stochastic gradient descent we"},{"from":1320.9,"to":1324.73,"location":2,"content":"sample our window in the simplest case"},{"from":1324.73,"to":1329.75,"location":2,"content":"we just for this one window work out an"},{"from":1329.75,"to":1332.24,"location":2,"content":"estimate of the gradient and we use it"},{"from":1332.24,"to":1334.85,"location":2,"content":"as a parameter update so this is sort of"},{"from":1334.85,"to":1338.96,"location":2,"content":"an amazingly amazingly noisy estimate of"},{"from":1338.96,"to":1341.45,"location":2,"content":"the gradient but it sort of doesn't"},{"from":1341.45,"to":1343.19,"location":2,"content":"matter too much because as soon as we've"},{"from":1343.19,"to":1344.57,"location":2,"content":"done it we're gonna choose a different"},{"from":1344.57,"to":1346.97,"location":2,"content":"Center word and do it again and again so"},{"from":1346.97,"to":1348.98,"location":2,"content":"that gradually we sort of approach what"},{"from":1348.98,"to":1350.9,"location":2,"content":"we would have gotten if we'd sort of"},{"from":1350.9,"to":1353.42,"location":2,"content":"looked at all of the Center words before"},{"from":1353.42,"to":1355.91,"location":2,"content":"we took any steps but because we take"},{"from":1355.91,"to":1359.09,"location":2,"content":"steps as we go we get to the minimum of"},{"from":1359.09,"to":1361.85,"location":2,"content":"the function orders of magnitude more"},{"from":1361.85,"to":1366.8,"location":2,"content":"quickly so this shows the simplest case"},{"from":1366.8,"to":1368.99,"location":2,"content":"where we just sampling one window in"},{"from":1368.99,"to":1371.57,"location":2,"content":"practice that's not what we normally do"},{"from":1371.57,"to":1375.98,"location":2,"content":"we normally sample us a small bunch you"},{"from":1375.98,"to":1380.3,"location":2,"content":"know order approximately 32 or 64 so if"},{"from":1380.3,"to":1383.03,"location":2,"content":"we have a sample that's bigger"},{"from":1383.03,"to":1384.59,"location":2,"content":"that's generally referred to as a mini"},{"from":1384.59,"to":1387.2,"location":2,"content":"batch and we calculate a gradient"},{"from":1387.2,"to":1390.56,"location":2,"content":"estimate from the mini batch so that has"},{"from":1390.56,"to":1394.16,"location":2,"content":"two advantages one advantage is that you"},{"from":1394.16,"to":1396.62,"location":2,"content":"kind of get less noisy estimates of the"},{"from":1396.62,"to":1398.45,"location":2,"content":"gradient because you've kind of averaged"},{"from":1398.45,"to":1400.64,"location":2,"content":"over a bunch of examples rather than"},{"from":1400.64,"to":1403.46,"location":2,"content":"just using one but the second advantage"},{"from":1403.46,"to":1405.95,"location":2,"content":"which is the one way we really care is"},{"from":1405.95,"to":1408.35,"location":2,"content":"if we want our computations to go fast"},{"from":1408.35,"to":1412.64,"location":2,"content":"when we're using a GPU that you need to"},{"from":1412.64,"to":1414.74,"location":2,"content":"get parallelization of doing the same"},{"from":1414.74,"to":1416.84,"location":2,"content":"operation a whole bunch of times and"},{"from":1416.84,"to":1419.21,"location":2,"content":"then you gain a lot by using a mini"},{"from":1419.21,"to":1421.55,"location":2,"content":"batch of 64 examples or something like"},{"from":1421.55,"to":1424.4,"location":2,"content":"that and you don't have to but you know"},{"from":1424.4,"to":1426.29,"location":2,"content":"it turns out the details of the guts of"},{"from":1426.29,"to":1426.63,"location":2,"content":"the"},{"from":1426.63,"to":1428.37,"location":2,"content":"I'd wear that you know there's nvidia"},{"from":1428.37,"to":1431.07,"location":2,"content":"gpus you know have these whatever they"},{"from":1431.07,"to":1432.84,"location":2,"content":"have is inside them their own powers of"},{"from":1432.84,"to":1435.24,"location":2,"content":"two so you get better speed ups if you"},{"from":1435.24,"to":1438.39,"location":2,"content":"use batches like 32 or 64 rather than"},{"from":1438.39,"to":1440.25,"location":2,"content":"just deciding that 42 is still your"},{"from":1440.25,"to":1441.84,"location":2,"content":"favorite number from high school and"},{"from":1441.84,"to":1444.18,"location":2,"content":"you're gonna use that as the size of"},{"from":1444.18,"to":1450.87,"location":2,"content":"your mini batch okay yeah here's one"},{"from":1450.87,"to":1453.87,"location":2,"content":"other interesting thing which actually"},{"from":1453.87,"to":1456.21,"location":2,"content":"has some optimization details in it it"},{"from":1456.21,"to":1459.93,"location":2,"content":"turns out if you think of these doing"},{"from":1459.93,"to":1462.45,"location":2,"content":"stochastic gradients with word vectors"},{"from":1462.45,"to":1464.7,"location":2,"content":"it's actually very different to some"},{"from":1464.7,"to":1466.59,"location":2,"content":"other deep learning problems like vision"},{"from":1466.59,"to":1469.35,"location":2,"content":"deep learning problems because for"},{"from":1469.35,"to":1471.66,"location":2,"content":"either a single window or even a sort of"},{"from":1471.66,"to":1474.27,"location":2,"content":"a reasonably sized mini batch it'll turn"},{"from":1474.27,"to":1477.21,"location":2,"content":"out that those mini batches the mini"},{"from":1477.21,"to":1479.88,"location":2,"content":"batch only has you know relatively"},{"from":1479.88,"to":1481.89,"location":2,"content":"speaking a handful of words in it right"},{"from":1481.89,"to":1483.66,"location":2,"content":"so if you have a mini batch of size 32"},{"from":1483.66,"to":1486.48,"location":2,"content":"in a window size of 10 you know probably"},{"from":1486.48,"to":1488.97,"location":2,"content":"there are only about 100 hundred 50"},{"from":1488.97,"to":1491.31,"location":2,"content":"different words in it but yet we're"},{"from":1491.31,"to":1493.8,"location":2,"content":"building this model over a vocabulary of"},{"from":1493.8,"to":1495.51,"location":2,"content":"quarter of a million words or something"},{"from":1495.51,"to":1497.64,"location":2,"content":"like that so just about all of the"},{"from":1497.64,"to":1502.91,"location":2,"content":"elements in this vector are zero and so"},{"from":1502.91,"to":1506.84,"location":2,"content":"we sort of really have this very sparse"},{"from":1506.84,"to":1511.44,"location":2,"content":"parameter update and so that sort of"},{"from":1511.44,"to":1514.65,"location":2,"content":"suggests that we actually probably want"},{"from":1514.65,"to":1517.17,"location":2,"content":"to sort of only update the word vectors"},{"from":1517.17,"to":1519.84,"location":2,"content":"that appear and then the question is"},{"from":1519.84,"to":1521.46,"location":2,"content":"whether you can achieve that right the"},{"from":1521.46,"to":1522.99,"location":2,"content":"dumb way to do it is you just have this"},{"from":1522.99,"to":1526.11,"location":2,"content":"matrix that's normally nearly all zeros"},{"from":1526.11,"to":1529.08,"location":2,"content":"and you say add those two matrices"},{"from":1529.08,"to":1532.23,"location":2,"content":"together and there you go and then the"},{"from":1532.23,"to":1534.66,"location":2,"content":"question is can you actually have a"},{"from":1534.66,"to":1537.66,"location":2,"content":"sparse matrix update which only updates"},{"from":1537.66,"to":1540.57,"location":2,"content":"the certain rows of the matrix that"},{"from":1540.57,"to":1542.25,"location":2,"content":"contain the words that you've entered"},{"from":1542.25,"to":1545.37,"location":2,"content":"and do things much faster and if you're"},{"from":1545.37,"to":1547.23,"location":2,"content":"doing something even cleverer like doing"},{"from":1547.23,"to":1549.74,"location":2,"content":"distributed computation over multiple"},{"from":1549.74,"to":1552.09,"location":2,"content":"computers and sharing your parameters"},{"from":1552.09,"to":1553.38,"location":2,"content":"well then definitely you just sort of"},{"from":1553.38,"to":1555.45,"location":2,"content":"only want to update the word vectors"},{"from":1555.45,"to":1557.1,"location":2,"content":"that you've actually been getting a"},{"from":1557.1,"to":1559.23,"location":2,"content":"parameter estimate for so there's sort"},{"from":1559.23,"to":1560.61,"location":2,"content":"of some details there"},{"from":1560.61,"to":1562.26,"location":2,"content":"but I'm gonna skip past them more"},{"from":1562.26,"to":1566.4,"location":2,"content":"details right so a couple of people"},{"from":1566.4,"to":1569.25,"location":2,"content":"asked afterwards yeah why are there"},{"from":1569.25,"to":1571.8,"location":2,"content":"these two word vectors they're sort of"},{"from":1571.8,"to":1575.16,"location":2,"content":"Center and the outside one and I mean"},{"from":1575.16,"to":1577.68,"location":2,"content":"the answer to that is it makes that mat"},{"from":1577.68,"to":1581.91,"location":2,"content":"I showed you easy right so that if if"},{"from":1581.91,"to":1585.75,"location":2,"content":"you do it as I showed you well you know"},{"from":1585.75,"to":1589.74,"location":2,"content":"for working out the partial derivatives"},{"from":1589.74,"to":1592.98,"location":2,"content":"for the center word it's just as I"},{"from":1592.98,"to":1597.15,"location":2,"content":"showed you it's easy but if you use only"},{"from":1597.15,"to":1601.05,"location":2,"content":"one set of word vectors well then the"},{"from":1601.05,"to":1603.42,"location":2,"content":"same word that's the center of word will"},{"from":1603.42,"to":1606.18,"location":2,"content":"be one of the choices for the context"},{"from":1606.18,"to":1607.65,"location":2,"content":"word when you're working out that"},{"from":1607.65,"to":1610.26,"location":2,"content":"softmax for the context word and then"},{"from":1610.26,"to":1612.48,"location":2,"content":"you'll get these terms that are then"},{"from":1612.48,"to":1615.12,"location":2,"content":"squared terms in terms of the two"},{"from":1615.12,"to":1618.15,"location":2,"content":"references of that same word and that"},{"from":1618.15,"to":1621.63,"location":2,"content":"makes your math more difficult so it's"},{"from":1621.63,"to":1624.6,"location":2,"content":"sort of just a practical thing in the"},{"from":1624.6,"to":1627.12,"location":2,"content":"end I mean it sort of doesn't make very"},{"from":1627.12,"to":1629.46,"location":2,"content":"much difference because if you sort of"},{"from":1629.46,"to":1630.96,"location":2,"content":"think about it since you're going along"},{"from":1630.96,"to":1633.81,"location":2,"content":"through all the positions you know what"},{"from":1633.81,"to":1635.82,"location":2,"content":"was a Center word at one point is"},{"from":1635.82,"to":1637.89,"location":2,"content":"immediately afterwards the context word"},{"from":1637.89,"to":1640.71,"location":2,"content":"of what used to be a context word which"},{"from":1640.71,"to":1642.42,"location":2,"content":"is now the center words they sort of"},{"from":1642.42,"to":1646.02,"location":2,"content":"doing the same computations because you"},{"from":1646.02,"to":1647.58,"location":2,"content":"know the dot product is symmetric"},{"from":1647.58,"to":1650.64,"location":2,"content":"actually all over again"},{"from":1650.64,"to":1652.98,"location":2,"content":"so you've they get pretty similar vector"},{"from":1652.98,"to":1655.2,"location":2,"content":"representations so it seems like in"},{"from":1655.2,"to":1656.61,"location":2,"content":"general you can get the best results by"},{"from":1656.61,"to":1658.56,"location":2,"content":"averaging what comes out for your two"},{"from":1658.56,"to":1660.36,"location":2,"content":"vectors and you end up with just one"},{"from":1660.36,"to":1664.73,"location":2,"content":"vector per word okay more substantively"},{"from":1664.73,"to":1668.79,"location":2,"content":"if you go to the word two vector you"},{"from":1668.79,"to":1670.26,"location":2,"content":"will discover that they're sort of more"},{"from":1670.26,"to":1672.66,"location":2,"content":"two-word to Veck that they define the"},{"from":1672.66,"to":1675.09,"location":2,"content":"sort of a family of word to Veck muddles"},{"from":1675.09,"to":1677.67,"location":2,"content":"and there are so two main parts of that"},{"from":1677.67,"to":1680.82,"location":2,"content":"family firstly there's a choice between"},{"from":1680.82,"to":1683.43,"location":2,"content":"the continuous bag of words model and"},{"from":1683.43,"to":1685.8,"location":2,"content":"the skip grams model and what I"},{"from":1685.8,"to":1688.08,"location":2,"content":"presented was the Skip Graham's model so"},{"from":1688.08,"to":1689.94,"location":2,"content":"in the skip Graham's model you've got"},{"from":1689.94,"to":1692.13,"location":2,"content":"one Center word and you're trying to"},{"from":1692.13,"to":1693.78,"location":2,"content":"predict all the words in"},{"from":1693.78,"to":1696.9,"location":2,"content":"takes one at a time for the continuous"},{"from":1696.9,"to":1698.94,"location":2,"content":"bag of words model it's the opposite"},{"from":1698.94,"to":1701.67,"location":2,"content":"you've got all of the outside words and"},{"from":1701.67,"to":1704.25,"location":2,"content":"you're trying to use all of them though"},{"from":1704.25,"to":1706.29,"location":2,"content":"considered independently like a naive"},{"from":1706.29,"to":1709.97,"location":2,"content":"Bayes model to predict the center word"},{"from":1709.97,"to":1715.02,"location":2,"content":"and then the second one is the way I"},{"from":1715.02,"to":1718.29,"location":2,"content":"presented learning this was the method"},{"from":1718.29,"to":1719.64,"location":2,"content":"that's using the so called"},{"from":1719.64,"to":1722.52,"location":2,"content":"naive softmax so therefore when we were"},{"from":1722.52,"to":1725.04,"location":2,"content":"wanting to work things out we were sort"},{"from":1725.04,"to":1727.17,"location":2,"content":"of saying okay we want probability"},{"from":1727.17,"to":1729.78,"location":2,"content":"estimates for the context words and so"},{"from":1729.78,"to":1731.25,"location":2,"content":"we're just going to sum over the whole"},{"from":1731.25,"to":1733.89,"location":2,"content":"vocabulary and we'll come up with these"},{"from":1733.89,"to":1737.7,"location":2,"content":"probability estimates in practice that"},{"from":1737.7,"to":1740.25,"location":2,"content":"turns out to be a sort of a bad idea"},{"from":1740.25,"to":1742.98,"location":2,"content":"because that would also make things mega"},{"from":1742.98,"to":1746.79,"location":2,"content":"slow so in homework 2 coming up next"},{"from":1746.79,"to":1749.85,"location":2,"content":"week you will get to implement a much"},{"from":1749.85,"to":1752.85,"location":2,"content":"more practical way of doing this which"},{"from":1752.85,"to":1755.07,"location":2,"content":"they present in the word to vectors"},{"from":1755.07,"to":1757.17,"location":2,"content":"right so the problem is if we're using"},{"from":1757.17,"to":1759.96,"location":2,"content":"this equation that we used to do the"},{"from":1759.96,"to":1762.45,"location":2,"content":"calculus that down in this denominator"},{"from":1762.45,"to":1765.51,"location":2,"content":"here we're doing the sum over the entire"},{"from":1765.51,"to":1767.43,"location":2,"content":"vocabulary so if you have a vocabulary"},{"from":1767.43,"to":1769.62,"location":2,"content":"quarter million words we're sort of"},{"from":1769.62,"to":1771,"location":2,"content":"doing a quarter of a million dot"},{"from":1771,"to":1773.34,"location":2,"content":"products and Exponential's and adding"},{"from":1773.34,"to":1774.93,"location":2,"content":"them all to and work out that"},{"from":1774.93,"to":1777.93,"location":2,"content":"denominator and that sort of seems a"},{"from":1777.93,"to":1779.97,"location":2,"content":"sort of a really bad idea if you want"},{"from":1779.97,"to":1781.14,"location":2,"content":"things to be fast"},{"from":1781.14,"to":1785.82,"location":2,"content":"so Thomas Miko often colleagues came up"},{"from":1785.82,"to":1787.74,"location":2,"content":"with this idea of negative sampling"},{"from":1787.74,"to":1789.72,"location":2,"content":"would be near enough and so the idea of"},{"from":1789.72,"to":1791.82,"location":2,"content":"negative sampling is we're going to"},{"from":1791.82,"to":1794.76,"location":2,"content":"Train binary logistic regressions"},{"from":1794.76,"to":1797.49,"location":2,"content":"instead and so we're going to train one"},{"from":1797.49,"to":1800.34,"location":2,"content":"binary logistic regression for the"},{"from":1800.34,"to":1802.71,"location":2,"content":"actual word observed what's in the"},{"from":1802.71,"to":1805.26,"location":2,"content":"numerator and you want to give high"},{"from":1805.26,"to":1807.45,"location":2,"content":"probability to the word that was"},{"from":1807.45,"to":1810.93,"location":2,"content":"actually observed and then what we're"},{"from":1810.93,"to":1812.79,"location":2,"content":"going to do is we're going to sort of"},{"from":1812.79,"to":1815.37,"location":2,"content":"randomly sample a bunch of other words"},{"from":1815.37,"to":1818.1,"location":2,"content":"they're the negative samples and say"},{"from":1818.1,"to":1820.71,"location":2,"content":"they weren't the ones that were actually"},{"from":1820.71,"to":1823.29,"location":2,"content":"seen so you should be trying to give"},{"from":1823.29,"to":1826.97,"location":2,"content":"them as lower probability as possible"},{"from":1826.97,"to":1831.05,"location":2,"content":"okay so the sort of notation that they"},{"from":1831.05,"to":1833.3,"location":2,"content":"use in the paper is so slightly"},{"from":1833.3,"to":1835.52,"location":2,"content":"different to the one I've used and they"},{"from":1835.52,"to":1837.08,"location":2,"content":"actually do maximization not"},{"from":1837.08,"to":1840.23,"location":2,"content":"minimization and that's their equation"},{"from":1840.23,"to":1844.64,"location":2,"content":"which I'll come back to there before we"},{"from":1844.64,"to":1846.86,"location":2,"content":"do that here's the sigmoid function so"},{"from":1846.86,"to":1848.78,"location":2,"content":"the sigmoid functions normally written"},{"from":1848.78,"to":1851.51,"location":2,"content":"like this 1 over 1 plus e to the minus X"},{"from":1851.51,"to":1855.74,"location":2,"content":"but essentially the sigmoid function is"},{"from":1855.74,"to":1858.38,"location":2,"content":"like a binary case of the softmax"},{"from":1858.38,"to":1860.87,"location":2,"content":"function right that we have two possible"},{"from":1860.87,"to":1863.93,"location":2,"content":"outcomes yes and no and that you're sort"},{"from":1863.93,"to":1866.84,"location":2,"content":"of again got an import that is any real"},{"from":1866.84,"to":1869.3,"location":2,"content":"number and it's mapping it onto a"},{"from":1869.3,"to":1872.03,"location":2,"content":"probability distribution between 0 and 1"},{"from":1872.03,"to":1873.98,"location":2,"content":"which represents these two binary"},{"from":1873.98,"to":1876.05,"location":2,"content":"outcomes and the extent that the numbers"},{"from":1876.05,"to":1878.63,"location":2,"content":"positive it kind of ceilings 2 1 and"},{"from":1878.63,"to":1882.38,"location":2,"content":"negative goes down to 0 ok so with this"},{"from":1882.38,"to":1885.02,"location":2,"content":"time we're going to take the dot prefer"},{"from":1885.02,"to":1887.09,"location":2,"content":"the good word we're going to take the"},{"from":1887.09,"to":1889.88,"location":2,"content":"dot product of the two vectors shove it"},{"from":1889.88,"to":1892.19,"location":2,"content":"through a sigmoid function and then"},{"from":1892.19,"to":1893.42,"location":2,"content":"we're going to want that probability"},{"from":1893.42,"to":1897.5,"location":2,"content":"estimate to be as high as possible so if"},{"from":1897.5,"to":1899.33,"location":2,"content":"I show you this version which is just"},{"from":1899.33,"to":1902.3,"location":2,"content":"written slightly differently to look as"},{"from":1902.3,"to":1904.91,"location":2,"content":"much as possible like the notation that"},{"from":1904.91,"to":1906.65,"location":2,"content":"we use last time"},{"from":1906.65,"to":1908.99,"location":2,"content":"here is our new objective function for"},{"from":1908.99,"to":1910.97,"location":2,"content":"using negative sound playing and we've"},{"from":1910.97,"to":1915.17,"location":2,"content":"got two terms the first one is the log"},{"from":1915.17,"to":1918.68,"location":2,"content":"of the sigmoid of the observed context"},{"from":1918.68,"to":1921.08,"location":2,"content":"word the outside words dot producted"},{"from":1921.08,"to":1923.33,"location":2,"content":"with the center word and we're going to"},{"from":1923.33,"to":1927.02,"location":2,"content":"want that to be big and then on the"},{"from":1927.02,"to":1934.51,"location":2,"content":"other hand we've got the randomly chosen"},{"from":1934.51,"to":1938.6,"location":2,"content":"K words which are just other words and"},{"from":1938.6,"to":1939.98,"location":2,"content":"we're going to work out dot products"},{"from":1939.98,"to":1941.87,"location":2,"content":"between them and the Center word and"},{"from":1941.87,"to":1943.91,"location":2,"content":"we're going to want those to be as small"},{"from":1943.91,"to":1946.58,"location":2,"content":"as possible like that extra minus sign"},{"from":1946.58,"to":1948.98,"location":2,"content":"in there which is causing the sign of"},{"from":1948.98,"to":1951.44,"location":2,"content":"the two things to be different right"},{"from":1951.44,"to":1954.95,"location":2,"content":"today's our negative samples and for big"},{"from":1954.95,"to":1957.29,"location":2,"content":"K it can be reasonably modest number you"},{"from":1957.29,"to":1959.87,"location":2,"content":"can just take kind of 1015 negative"},{"from":1959.87,"to":1960.8,"location":2,"content":"samples"},{"from":1960.8,"to":1964.28,"location":2,"content":"and that works pretty fine I said we"},{"from":1964.28,"to":1967.34,"location":2,"content":"sort of sampled some words to be the"},{"from":1967.34,"to":1969.73,"location":2,"content":"negative samples they in particular"},{"from":1969.73,"to":1973.34,"location":2,"content":"proposed a sampling distribution that"},{"from":1973.34,"to":1976.25,"location":2,"content":"helps them along a little in partly"},{"from":1976.25,"to":1978.47,"location":2,"content":"dealing with this problem of very"},{"from":1978.47,"to":1982.55,"location":2,"content":"frequent words so the starting point of"},{"from":1982.55,"to":1984.77,"location":2,"content":"how you sample words is you use what we"},{"from":1984.77,"to":1987.74,"location":2,"content":"call the a unigram distribution so that"},{"from":1987.74,"to":1989.63,"location":2,"content":"just means you take words in a large"},{"from":1989.63,"to":1992.69,"location":2,"content":"corpus and count up how often each one"},{"from":1992.69,"to":1995.42,"location":2,"content":"occurs just as a count of independent"},{"from":1995.42,"to":1997.25,"location":2,"content":"word so there's the current unigram"},{"from":1997.25,"to":1999.65,"location":2,"content":"counts and so you start off with unigram"},{"from":1999.65,"to":2001.9,"location":2,"content":"counts but then you raise them to the"},{"from":2001.9,"to":2005.08,"location":2,"content":"3/4 power and raising to the 3/4 power"},{"from":2005.08,"to":2008.74,"location":2,"content":"has the effect of decreasing how often"},{"from":2008.74,"to":2010.74,"location":2,"content":"you sample very common words and"},{"from":2010.74,"to":2013.39,"location":2,"content":"increasing how often you sample rarer"},{"from":2013.39,"to":2020.08,"location":2,"content":"words ok and that's that okay so that's"},{"from":2020.08,"to":2023.02,"location":2,"content":"everything about word to Vic I'm going"},{"from":2023.02,"to":2034.57,"location":2,"content":"to say anyone have any lasting yes sorry"},{"from":2034.57,"to":2037.03,"location":2,"content":"see that capital Z is often used as a"},{"from":2037.03,"to":2040.51,"location":2,"content":"normalization term and so this is saying"},{"from":2040.51,"to":2042.22,"location":2,"content":"well if you want the probability"},{"from":2042.22,"to":2044.53,"location":2,"content":"distribution of words is you work out"},{"from":2044.53,"to":2047.32,"location":2,"content":"this 3/4 power of the count of the word"},{"from":2047.32,"to":2049.36,"location":2,"content":"for every word in the vocabulary and"},{"from":2049.36,"to":2051.94,"location":2,"content":"then these numbers you just sum them up"},{"from":2051.94,"to":2053.74,"location":2,"content":"over the vocabulary and it'll be sum"},{"from":2053.74,"to":2056.11,"location":2,"content":"total and we're dividing by that so we"},{"from":2056.11,"to":2057.88,"location":2,"content":"get a probability distribution good"},{"from":2057.88,"to":2059.47,"location":2,"content":"question because I hadn't explained that"},{"from":2059.47,"to":2061.96,"location":2,"content":"in this class when you see the letter Z"},{"from":2061.96,"to":2064.9,"location":2,"content":"with no explanation it normally means I"},{"from":2064.9,"to":2068.47,"location":2,"content":"am a normalization term to turn things"},{"from":2068.47,"to":2070.54,"location":2,"content":"into probabilities and you sort of"},{"from":2070.54,"to":2072.37,"location":2,"content":"iterate over the numerator term and"},{"from":2072.37,"to":2075.37,"location":2,"content":"summing them and divide through any"},{"from":2075.37,"to":2076.96,"location":2,"content":"other questions of things I haven't"},{"from":2076.96,"to":2087.07,"location":2,"content":"explained or otherwise yes yes so it's"},{"from":2087.07,"to":2089.02,"location":2,"content":"nice window do you so I'll actually come"},{"from":2089.02,"to":2091.06,"location":2,"content":"back to that in a bit and show a little"},{"from":2091.06,"to":2093.13,"location":2,"content":"bit of data on that but yeah we haven't"},{"from":2093.13,"to":2094.21,"location":2,"content":"done anything about"},{"from":2094.21,"to":2095.56,"location":2,"content":"that at the moment we're guessing a"},{"from":2095.56,"to":2097.84,"location":2,"content":"window size like five which isn't a bad"},{"from":2097.84,"to":2098.8,"location":2,"content":"one"},{"from":2098.8,"to":2100.99,"location":2,"content":"but you know there isn't there hasn't"},{"from":2100.99,"to":2104.53,"location":2,"content":"really been any science behind that that"},{"from":2104.53,"to":2106.3,"location":2,"content":"people treat that as what's then called"},{"from":2106.3,"to":2109.69,"location":2,"content":"a hyper parameter which means that you"},{"from":2109.69,"to":2111.88,"location":2,"content":"try a few different numbers and see"},{"from":2111.88,"to":2114.13,"location":2,"content":"which one seems best and that's the one"},{"from":2114.13,"to":2118.26,"location":2,"content":"that you use in your future work yeah"},{"from":2118.26,"to":2125.49,"location":2,"content":"our chosen for any theoretical reason no"},{"from":2125.49,"to":2130.24,"location":2,"content":"that that was also chosen as a hyper"},{"from":2130.24,"to":2133.06,"location":2,"content":"parameter that improved performance I"},{"from":2133.06,"to":2136.78,"location":2,"content":"mean actually you know for this word to"},{"from":2136.78,"to":2140.35,"location":2,"content":"vector I mean you know it turns out that"},{"from":2140.35,"to":2145.21,"location":2,"content":"in the actual paper the model looks very"},{"from":2145.21,"to":2148.51,"location":2,"content":"fairly clean but what people discovered"},{"from":2148.51,"to":2150.73,"location":2,"content":"when they started digging through the"},{"from":2150.73,"to":2153.46,"location":2,"content":"code which to to their credit they did"},{"from":2153.46,"to":2155.62,"location":2,"content":"make available reproducible research"},{"from":2155.62,"to":2158.44,"location":2,"content":"that they're actually a whole bunch of"},{"from":2158.44,"to":2161.26,"location":2,"content":"tricks of different things like these"},{"from":2161.26,"to":2164.59,"location":2,"content":"hyper parameters of how you sample and"},{"from":2164.59,"to":2166.9,"location":2,"content":"how you wait windows and various things"},{"from":2166.9,"to":2169.12,"location":2,"content":"to make the numbers better so you know"},{"from":2169.12,"to":2171.13,"location":2,"content":"people play quite a few tricks to make"},{"from":2171.13,"to":2172.72,"location":2,"content":"the numbers go up which aren't"},{"from":2172.72,"to":2201.06,"location":2,"content":"particularly theoretical good sometimes"},{"from":2201.06,"to":2207.28,"location":2,"content":"I so I you so in general for a lot of"},{"from":2207.28,"to":2209.31,"location":2,"content":"these sampling things that's a bad idea"},{"from":2209.31,"to":2211.96,"location":2,"content":"you're going to be doing multiple passes"},{"from":2211.96,"to":2214.18,"location":2,"content":"if you just go boom boom full and then"},{"from":2214.18,"to":2215.95,"location":2,"content":"bloom bloom bloom again that's a bad"},{"from":2215.95,"to":2218.47,"location":2,"content":"idea but a common technique a lot of the"},{"from":2218.47,"to":2220.81,"location":2,"content":"packages use is that they do use a"},{"from":2220.81,"to":2223.06,"location":2,"content":"shuffling operation at the beginning so"},{"from":2223.06,"to":2225.22,"location":2,"content":"for each epoch they'll shuffle the data"},{"from":2225.22,"to":2227.38,"location":2,"content":"randomly and then they'll go through it"},{"from":2227.38,"to":2227.65,"location":2,"content":"and"},{"from":2227.65,"to":2229.78,"location":2,"content":"sequence and that has the benefits of"},{"from":2229.78,"to":2233.47,"location":2,"content":"faster computation from locality etc"},{"from":2233.47,"to":2235.84,"location":2,"content":"while meaning that when you do a"},{"from":2235.84,"to":2237.31,"location":2,"content":"differently pocket will work out"},{"from":2237.31,"to":2249.49,"location":2,"content":"differently yeah that last question I"},{"from":2249.49,"to":2251.55,"location":2,"content":"think was talking about taking the"},{"from":2251.55,"to":2254.04,"location":2,"content":"mini-batches from the corpus and"},{"from":2254.04,"to":2256.21,"location":2,"content":"contrasting whether you actually say"},{"from":2256.21,"to":2258.55,"location":2,"content":"sample 20 randomly from the whole corpus"},{"from":2258.55,"to":2260.47,"location":2,"content":"versus just sort of working from left to"},{"from":2260.47,"to":2278.59,"location":2,"content":"right you have a question you could"},{"from":2278.59,"to":2280.84,"location":2,"content":"argue whether or not this was written in"},{"from":2280.84,"to":2283.24,"location":2,"content":"the clearest way but right so we're"},{"from":2283.24,"to":2286,"location":2,"content":"making this dot product and then when"},{"from":2286,"to":2288.57,"location":2,"content":"the gating eart which is then flipping"},{"from":2288.57,"to":2291.34,"location":2,"content":"which side of the space we're on right"},{"from":2291.34,"to":2294.94,"location":2,"content":"because the sigmoid is symmetric around"},{"from":2294.94,"to":2298.2,"location":2,"content":"zero so if we've got some dot product"},{"from":2298.2,"to":2301.39,"location":2,"content":"and then we negate it we're sort of"},{"from":2301.39,"to":2304.84,"location":2,"content":"working out a 1 minus probability and so"},{"from":2304.84,"to":2307.96,"location":2,"content":"that's the way in which we're actually"},{"from":2307.96,"to":2311.38,"location":2,"content":"for the first term for the first term"},{"from":2311.38,"to":2313.15,"location":2,"content":"we're wanting the probability to be high"},{"from":2313.15,"to":2315.76,"location":2,"content":"and then for the negative samples we're"},{"from":2315.76,"to":2318.81,"location":2,"content":"wanting their probabilities you be low"},{"from":2318.81,"to":2324.85,"location":2,"content":"okay oh maybe run ahead now so this was"},{"from":2324.85,"to":2329.17,"location":2,"content":"an algorithm which sort of you're going"},{"from":2329.17,"to":2331.39,"location":2,"content":"through this corpus position by position"},{"from":2331.39,"to":2334.81,"location":2,"content":"and you're sort of doing this prediction"},{"from":2334.81,"to":2337.36,"location":2,"content":"of words and then you're updating some"},{"from":2337.36,"to":2338.98,"location":2,"content":"parameters and you're learning something"},{"from":2338.98,"to":2341.32,"location":2,"content":"and you know by Jove it seemed to work"},{"from":2341.32,"to":2344.53,"location":2,"content":"based on what we saw in the examples but"},{"from":2344.53,"to":2346.69,"location":2,"content":"you know you might have thought that"},{"from":2346.69,"to":2349.36,"location":2,"content":"that was kind of weird right look we"},{"from":2349.36,"to":2351.79,"location":2,"content":"have this whole big pile of data you"},{"from":2351.79,"to":2355.27,"location":2,"content":"know sort of traditional I thinking of"},{"from":2355.27,"to":2357.46,"location":2,"content":"Statistics right if you have a big pile"},{"from":2357.46,"to":2360.19,"location":2,"content":"of data you a granade it and it sort of"},{"from":2360.19,"to":2361.51,"location":2,"content":"seems like there obviously"},{"from":2361.51,"to":2363.19,"location":2,"content":"you could do here you could say well"},{"from":2363.19,"to":2365.77,"location":2,"content":"there's a word like whatever word we're"},{"from":2365.77,"to":2368.23,"location":2,"content":"using banana let's just see what words"},{"from":2368.23,"to":2370.72,"location":2,"content":"occur in the context of a gut banana and"},{"from":2370.72,"to":2372.67,"location":2,"content":"count them all up and then we'll be able"},{"from":2372.67,"to":2375.22,"location":2,"content":"to use those to predict somehow and you"},{"from":2375.22,"to":2377.58,"location":2,"content":"know those kinds of methods were"},{"from":2377.58,"to":2380.95,"location":2,"content":"traditionally used including even with"},{"from":2380.95,"to":2383.8,"location":2,"content":"distributed representation techniques so"},{"from":2383.8,"to":2385.51,"location":2,"content":"I want to say a bit about that so you're"},{"from":2385.51,"to":2388.21,"location":2,"content":"fully educated and don't sound like one"},{"from":2388.21,"to":2390.85,"location":2,"content":"of those people who were aware of no"},{"from":2390.85,"to":2394.03,"location":2,"content":"work that happened before 2013 when your"},{"from":2394.03,"to":2397.63,"location":2,"content":"networks took off okay so what we could"},{"from":2397.63,"to":2400.12,"location":2,"content":"do is we can essentially do the same"},{"from":2400.12,"to":2403.6,"location":2,"content":"thing as sort of word to Veck we could"},{"from":2403.6,"to":2406.93,"location":2,"content":"say there's a five word window around"},{"from":2406.93,"to":2409.21,"location":2,"content":"each word instance that's often referred"},{"from":2409.21,"to":2411.91,"location":2,"content":"to as a word token right so in NLP we"},{"from":2411.91,"to":2413.68,"location":2,"content":"often want to distinguish between a"},{"from":2413.68,"to":2417.63,"location":2,"content":"particular kind of type like banana or"},{"from":2417.63,"to":2420.64,"location":2,"content":"Apple versus particular instances of an"},{"from":2420.64,"to":2422.35,"location":2,"content":"in the text and that's referred to sort"},{"from":2422.35,"to":2425.58,"location":2,"content":"of a type token distinction so we could"},{"from":2425.58,"to":2429.82,"location":2,"content":"look at each token of a word and the"},{"from":2429.82,"to":2431.68,"location":2,"content":"words five around that and then we"},{"from":2431.68,"to":2433.93,"location":2,"content":"should so start counting up which words"},{"from":2433.93,"to":2436.84,"location":2,"content":"occur occur with it and so we can then"},{"from":2436.84,"to":2441.84,"location":2,"content":"have a matrix of co-occurrence counts"},{"from":2441.84,"to":2445,"location":2,"content":"okay so we'll have again and I'm going"},{"from":2445,"to":2446.62,"location":2,"content":"to give an example of this so normally"},{"from":2446.62,"to":2448.87,"location":2,"content":"again use a five to ten but you know I"},{"from":2448.87,"to":2451.03,"location":2,"content":"can just use a window of one to keep my"},{"from":2451.03,"to":2453.61,"location":2,"content":"counts very simple and small I ignore"},{"from":2453.61,"to":2455.98,"location":2,"content":"left or right just like word defected"},{"from":2455.98,"to":2458.47,"location":2,"content":"and so if I have a teeny baby corpus"},{"from":2458.47,"to":2460.93,"location":2,"content":"like this you know what I could do is"},{"from":2460.93,"to":2463.42,"location":2,"content":"just say here's a matrix of word"},{"from":2463.42,"to":2466.69,"location":2,"content":"co-occurrence accounts so within my"},{"from":2466.69,"to":2469.54,"location":2,"content":"window size of one eye occurs next to"},{"from":2469.54,"to":2471.88,"location":2,"content":"like twice and that means that like"},{"from":2471.88,"to":2474.7,"location":2,"content":"occurs next why twice it's symmetric and"},{"from":2474.7,"to":2476.8,"location":2,"content":"all my other counts here are single"},{"from":2476.8,"to":2481.44,"location":2,"content":"turns and so this gives me a big huge"},{"from":2481.44,"to":2484.66,"location":2,"content":"sparse matrix of word co-occurrence"},{"from":2484.66,"to":2486.88,"location":2,"content":"accounts and so one thing that you could"},{"from":2486.88,"to":2489.31,"location":2,"content":"do is just use this matrix directly"},{"from":2489.31,"to":2492.04,"location":2,"content":"because I haven't really got enough data"},{"from":2492.04,"to":2495.6,"location":2,"content":"here but you know if you sort of"},{"from":2495.6,"to":2499.18,"location":2,"content":"decided that you know the word like is"},{"from":2499.18,"to":2501.76,"location":2,"content":"like the word learning what you do is"},{"from":2501.76,"to":2503.77,"location":2,"content":"you'd expect that these two vectors"},{"from":2503.77,"to":2506.17,"location":2,"content":"would end up kind of similar to each"},{"from":2506.17,"to":2509.85,"location":2,"content":"other they do so you could just measure"},{"from":2509.85,"to":2513.07,"location":2,"content":"similarity of the vectors directly in"},{"from":2513.07,"to":2515.8,"location":2,"content":"terms of these co-occurrence counts but"},{"from":2515.8,"to":2517.96,"location":2,"content":"you know it's a little bit unappealing"},{"from":2517.96,"to":2520.36,"location":2,"content":"doing things this way right if you have"},{"from":2520.36,"to":2522.73,"location":2,"content":"a quarter million word vocabulary that's"},{"from":2522.73,"to":2525.85,"location":2,"content":"where you're in this space where my math"},{"from":2525.85,"to":2527.83,"location":2,"content":"is bad but it's in the trillions of the"},{"from":2527.83,"to":2530.17,"location":2,"content":"number of cells of this matrix might"},{"from":2530.17,"to":2532.21,"location":2,"content":"require a lot of storage though if"},{"from":2532.21,"to":2533.65,"location":2,"content":"you're clever and notice that most of"},{"from":2533.65,"to":2535.66,"location":2,"content":"the cells were zero and could do some"},{"from":2535.66,"to":2538.27,"location":2,"content":"clever sparse matrix representation"},{"from":2538.27,"to":2540.46,"location":2,"content":"might take a little bit less your"},{"from":2540.46,"to":2542.47,"location":2,"content":"classification models might have sparse"},{"from":2542.47,"to":2543.97,"location":2,"content":"the issues because you know a lot of"},{"from":2543.97,"to":2545.92,"location":2,"content":"those cells aren't present and so it"},{"from":2545.92,"to":2548.08,"location":2,"content":"might not be very robust and so there"},{"from":2548.08,"to":2549.97,"location":2,"content":"was a traditional answer to all of these"},{"from":2549.97,"to":2552.97,"location":2,"content":"things which is well maybe we can have"},{"from":2552.97,"to":2556.03,"location":2,"content":"that big co-occurrence count matrix and"},{"from":2556.03,"to":2559.24,"location":2,"content":"somehow reduce its dimensionality I've"},{"from":2559.24,"to":2563.08,"location":2,"content":"just find a corresponding low"},{"from":2563.08,"to":2566.23,"location":2,"content":"dimensional matrix which preserves most"},{"from":2566.23,"to":2569.14,"location":2,"content":"of the information in the original"},{"from":2569.14,"to":2571.87,"location":2,"content":"matrix and you know maybe all reduce"},{"from":2571.87,"to":2574.36,"location":2,"content":"things to a dimensionality of somewhere"},{"from":2574.36,"to":2577.27,"location":2,"content":"around the size 25 to a thousand has"},{"from":2577.27,"to":2580.09,"location":2,"content":"done with word Davich so there's sort of"},{"from":2580.09,"to":2582.7,"location":2,"content":"a standard most common way of doing this"},{"from":2582.7,"to":2585.7,"location":2,"content":"dimensionality reduction and you don't"},{"from":2585.7,"to":2587.11,"location":2,"content":"really have to understand all the math"},{"from":2587.11,"to":2588.49,"location":2,"content":"but you get to play with this in"},{"from":2588.49,"to":2592.36,"location":2,"content":"homework 1 which is for any matrix you"},{"from":2592.36,"to":2594.1,"location":2,"content":"can do what's called the singular value"},{"from":2594.1,"to":2596.8,"location":2,"content":"decomposition which is a way you can"},{"from":2596.8,"to":2599.59,"location":2,"content":"take an arbitrary matrix and decompose"},{"from":2599.59,"to":2604.63,"location":2,"content":"it into 3 matrices where the center one"},{"from":2604.63,"to":2607.27,"location":2,"content":"is diagonal and has what in it what are"},{"from":2607.27,"to":2609.34,"location":2,"content":"called singular vectors which are"},{"from":2609.34,"to":2611.14,"location":2,"content":"weightings of the different dimensions"},{"from":2611.14,"to":2613.84,"location":2,"content":"so they decrease in size as you go"},{"from":2613.84,"to":2617.05,"location":2,"content":"downwards and then these two U and V and"},{"from":2617.05,"to":2620.32,"location":2,"content":"then orthogonal basis corresponding to"},{"from":2620.32,"to":2622.57,"location":2,"content":"the rows and columns and so in"},{"from":2622.57,"to":2625.03,"location":2,"content":"particular it's even simpler in the case"},{"from":2625.03,"to":2626.65,"location":2,"content":"where we just have these word word"},{"from":2626.65,"to":2628.99,"location":2,"content":"vectors because you have a square matrix"},{"from":2628.99,"to":2631.36,"location":2,"content":"and so they're effectively the same but"},{"from":2631.36,"to":2633.73,"location":2,"content":"you know for the general case although"},{"from":2633.73,"to":2636.37,"location":2,"content":"you get these sort of full orthogonal"},{"from":2636.37,"to":2639.46,"location":2,"content":"basis you then have these bits sort of"},{"from":2639.46,"to":2641.08,"location":2,"content":"don't really matter cuz they end up"},{"from":2641.08,"to":2642.7,"location":2,"content":"being used for nothing when you work out"},{"from":2642.7,"to":2645.88,"location":2,"content":"the product and then if you want to"},{"from":2645.88,"to":2648.64,"location":2,"content":"reduce the dimensionality what you say"},{"from":2648.64,"to":2651.31,"location":2,"content":"is throw away the smallest singular"},{"from":2651.31,"to":2654.04,"location":2,"content":"values which remember they're in"},{"from":2654.04,"to":2656.65,"location":2,"content":"decreasing size and that means you're"},{"from":2656.65,"to":2659.26,"location":2,"content":"then effectively throwing away rows and"},{"from":2659.26,"to":2662.47,"location":2,"content":"columns of these other matrices and then"},{"from":2662.47,"to":2665.05,"location":2,"content":"it says behold I've now reduced these"},{"from":2665.05,"to":2666.27,"location":2,"content":"things to a two dimensional"},{"from":2666.27,"to":2669.13,"location":2,"content":"representation from the original three"},{"from":2669.13,"to":2671.17,"location":2,"content":"dimensional representation and that's"},{"from":2671.17,"to":2674.59,"location":2,"content":"referred to as the reduced SVD and the"},{"from":2674.59,"to":2677.44,"location":2,"content":"classic result is in terms of least"},{"from":2677.44,"to":2680.98,"location":2,"content":"squares error in estimation that this"},{"from":2680.98,"to":2684.25,"location":2,"content":"the product of these three things will"},{"from":2684.25,"to":2688.84,"location":2,"content":"give XK which is the best Kate rank K"},{"from":2688.84,"to":2691.81,"location":2,"content":"approximation to the original X in terms"},{"from":2691.81,"to":2694.72,"location":2,"content":"of a x squared least squares criterion"},{"from":2694.72,"to":2697.57,"location":2,"content":"so we could do this and we could build"},{"from":2697.57,"to":2703.06,"location":2,"content":"word vectors so I can make use of num"},{"from":2703.06,"to":2706.48,"location":2,"content":"PI's SVD function and I can throw into"},{"from":2706.48,"to":2708.21,"location":2,"content":"it"},{"from":2708.21,"to":2713.92,"location":2,"content":"matrices and I can make word vectors and"},{"from":2713.92,"to":2716.14,"location":2,"content":"these ones look really bad but hey I"},{"from":2716.14,"to":2718.65,"location":2,"content":"give it a data set of three sentences"},{"from":2718.65,"to":2721.6,"location":2,"content":"exactly a fair comparison but so this"},{"from":2721.6,"to":2726.49,"location":2,"content":"technique was in popularized around the"},{"from":2726.49,"to":2729.01,"location":2,"content":"term the turn of the millennium it"},{"from":2729.01,"to":2731.47,"location":2,"content":"generally went for some word"},{"from":2731.47,"to":2733.48,"location":2,"content":"applications under the name of latent"},{"from":2733.48,"to":2735.82,"location":2,"content":"semantic analysis or latent semantic"},{"from":2735.82,"to":2738.31,"location":2,"content":"indexing and the idea was that you would"},{"from":2738.31,"to":2741.34,"location":2,"content":"have these semantic directions that you"},{"from":2741.34,"to":2743.02,"location":2,"content":"were finding in this low dimensional"},{"from":2743.02,"to":2745.39,"location":2,"content":"space that had meaning and people worked"},{"from":2745.39,"to":2747.55,"location":2,"content":"with it quite a bit for techniques like"},{"from":2747.55,"to":2750.37,"location":2,"content":"do trying to do information retrieval"},{"from":2750.37,"to":2754.69,"location":2,"content":"using these LS a approximations and it's"},{"from":2754.69,"to":2757.99,"location":2,"content":"sort of worked a bit it kind of never"},{"from":2757.99,"to":2762.95,"location":2,"content":"really worked very well I think and so"},{"from":2762.95,"to":2767.27,"location":2,"content":"never sort of hugely caught on but it's"},{"from":2767.27,"to":2769.49,"location":2,"content":"so the myth it's kind of continued to be"},{"from":2769.49,"to":2771.74,"location":2,"content":"explored actually mainly in the sort of"},{"from":2771.74,"to":2774.5,"location":2,"content":"cog psyche of cogs site community where"},{"from":2774.5,"to":2776.27,"location":2,"content":"people were doing things with word"},{"from":2776.27,"to":2778.55,"location":2,"content":"meaning and there's this sort of kind of"},{"from":2778.55,"to":2779.96,"location":2,"content":"interesting"},{"from":2779.96,"to":2783.08,"location":2,"content":"lacunae to the literature that there was"},{"from":2783.08,"to":2787.43,"location":2,"content":"this guy Doug roadie who did a PhD at"},{"from":2787.43,"to":2792.74,"location":2,"content":"CMU in 2005 and basically what he"},{"from":2792.74,"to":2795.92,"location":2,"content":"discovered was look if rather than just"},{"from":2795.92,"to":2799.82,"location":2,"content":"using law counts I start doing quite a"},{"from":2799.82,"to":2803.24,"location":2,"content":"bit more in terms of you know fiddling"},{"from":2803.24,"to":2805.58,"location":2,"content":"with the counts I can start to produce"},{"from":2805.58,"to":2808.04,"location":2,"content":"results that are much better so rather"},{"from":2808.04,"to":2810.2,"location":2,"content":"than using war counts you have to do"},{"from":2810.2,"to":2811.76,"location":2,"content":"something to deal with those very"},{"from":2811.76,"to":2814.58,"location":2,"content":"high-frequency words so one idea is you"},{"from":2814.58,"to":2816.2,"location":2,"content":"could log scale them which is also"},{"from":2816.2,"to":2817.9,"location":2,"content":"commonly used in information retrieval"},{"from":2817.9,"to":2820.76,"location":2,"content":"another idea is you could just use"},{"from":2820.76,"to":2823.64,"location":2,"content":"something like a sealing function so you"},{"from":2823.64,"to":2826.76,"location":2,"content":"take the minimum of X comma T for T set"},{"from":2826.76,"to":2829.54,"location":2,"content":"and a some number like around a hundred"},{"from":2829.54,"to":2833.48,"location":2,"content":"he had he used the idea which was also"},{"from":2833.48,"to":2835.49,"location":2,"content":"another of the hex that was put into the"},{"from":2835.49,"to":2837.65,"location":2,"content":"word Tyvek was rather than just you"},{"from":2837.65,"to":2840.38,"location":2,"content":"treating the whole window the same that"},{"from":2840.38,"to":2842.84,"location":2,"content":"you should count words that are closer"},{"from":2842.84,"to":2845.48,"location":2,"content":"more so in word to vaca"},{"from":2845.48,"to":2847.88,"location":2,"content":"they sample closer words more commonly"},{"from":2847.88,"to":2850.7,"location":2,"content":"than further away words in his system"},{"from":2850.7,"to":2851.66,"location":2,"content":"you're sort of having to have a"},{"from":2851.66,"to":2854.54,"location":2,"content":"differential count for closer words etc"},{"from":2854.54,"to":2858.53,"location":2,"content":"and then compared to any of that rather"},{"from":2858.53,"to":2861.23,"location":2,"content":"than using counts at all he then started"},{"from":2861.23,"to":2864.23,"location":2,"content":"using Pearson correlations which helped"},{"from":2864.23,"to":2866.63,"location":2,"content":"except they're sometimes negative and he"},{"from":2866.63,"to":2870.77,"location":2,"content":"decided that it helped if you then got"},{"from":2870.77,"to":2873.74,"location":2,"content":"rid of the negative values so in in some"},{"from":2873.74,"to":2875.56,"location":2,"content":"sense this sounds like a bag of hacks"},{"from":2875.56,"to":2879.14,"location":2,"content":"but on the other hand he was able to"},{"from":2879.14,"to":2881.84,"location":2,"content":"show that you know these transformed"},{"from":2881.84,"to":2884.27,"location":2,"content":"counts could actually then give you very"},{"from":2884.27,"to":2886.67,"location":2,"content":"useful word vectors as I'm about to show"},{"from":2886.67,"to":2890.09,"location":2,"content":"and well we have to realize that"},{"from":2890.09,"to":2893.03,"location":2,"content":"actually in slightly different forms"},{"from":2893.03,"to":2895.1,"location":2,"content":"several of these exact same counts are"},{"from":2895.1,"to":2896.6,"location":2,"content":"actually being used in word today as"},{"from":2896.6,"to":2906.1,"location":2,"content":"well"},{"from":2906.1,"to":2909.71,"location":2,"content":"yeah so so that's Annie I'm about to"},{"from":2909.71,"to":2912.44,"location":2,"content":"show exactly that that's actually a"},{"from":2912.44,"to":2915.35,"location":2,"content":"really interesting little bit of the"},{"from":2915.35,"to":2920.57,"location":2,"content":"data so yeah what yeah so the the thing"},{"from":2920.57,"to":2923,"location":2,"content":"if you do that you not only get word"},{"from":2923,"to":2925.64,"location":2,"content":"similarities are pretty good let me show"},{"from":2925.64,"to":2929.3,"location":2,"content":"you this example which is cleaner so"},{"from":2929.3,"to":2933.65,"location":2,"content":"this the precise idea of evaluating with"},{"from":2933.65,"to":2936.23,"location":2,"content":"analogies was not something that had"},{"from":2936.23,"to":2937.73,"location":2,"content":"really been developed so that was"},{"from":2937.73,"to":2940.03,"location":2,"content":"actually something that marsh mica love"},{"from":2940.03,"to":2944.93,"location":2,"content":"suggested but actually dug roadie made"},{"from":2944.93,"to":2948.77,"location":2,"content":"this really interesting observation"},{"from":2948.77,"to":2952.49,"location":2,"content":"which was he said look once I do these"},{"from":2952.49,"to":2955.01,"location":2,"content":"kind of transformations to improve the"},{"from":2955.01,"to":2956.87,"location":2,"content":"semantic representation of my word"},{"from":2956.87,"to":2960.05,"location":2,"content":"vectors look this really interesting"},{"from":2960.05,"to":2963.8,"location":2,"content":"property emerges that what you find is"},{"from":2963.8,"to":2967.64,"location":2,"content":"that there are semantic vectors which"},{"from":2967.64,"to":2970.85,"location":2,"content":"are basically linear components in my"},{"from":2970.85,"to":2973.58,"location":2,"content":"carefully constructed space so here we"},{"from":2973.58,"to":2976.73,"location":2,"content":"have the sort of verb to the doer of the"},{"from":2976.73,"to":2980.6,"location":2,"content":"verb Direction drive driver clean"},{"from":2980.6,"to":2984.44,"location":2,"content":"janitor swim swimmer learn teach or"},{"from":2984.44,"to":2988.61,"location":2,"content":"teach teacher doctor trade priest pray I"},{"from":2988.61,"to":2990.53,"location":2,"content":"mean you know it's not exactly perfect"},{"from":2990.53,"to":2992.18,"location":2,"content":"you know there's a little bit of wiggle"},{"from":2992.18,"to":2994.91,"location":2,"content":"there right but you know roughly it's"},{"from":2994.91,"to":2996.92,"location":2,"content":"completely clear that there's sort of a"},{"from":2996.92,"to":2999.71,"location":2,"content":"direction in the space that corresponds"},{"from":2999.71,"to":3003.97,"location":2,"content":"to from a verb to the doer of a verb and"},{"from":3003.97,"to":3007.18,"location":2,"content":"yeah so he hem to scott he no one has"},{"from":3007.18,"to":3008.56,"location":2,"content":"thought of this idea of doing the"},{"from":3008.56,"to":3012.43,"location":2,"content":"analogies its tests but the thing in"},{"from":3012.43,"to":3015.43,"location":2,"content":"retrospect that's obvious is if you can"},{"from":3015.43,"to":3018.22,"location":2,"content":"construct a vector space that has this"},{"from":3018.22,"to":3021.82,"location":2,"content":"linearity property then you're"},{"from":3021.82,"to":3023.59,"location":2,"content":"definitely going to do well in analogies"},{"from":3023.59,"to":3025.87,"location":2,"content":"so effectively he had invented a vector"},{"from":3025.87,"to":3027.79,"location":2,"content":"space that do well in analogies because"},{"from":3027.79,"to":3030.33,"location":2,"content":"this means that you've got there"},{"from":3030.33,"to":3033.12,"location":2,"content":"direction which is the doer and then you"},{"from":3033.12,"to":3034.77,"location":2,"content":"can immediately say that's the doer"},{"from":3034.77,"to":3036.12,"location":2,"content":"vector which you can get from"},{"from":3036.12,"to":3038.19,"location":2,"content":"subtracting clean from Schwimmer and"},{"from":3038.19,"to":3040.77,"location":2,"content":"that's right so clean from janitor and"},{"from":3040.77,"to":3043.38,"location":2,"content":"then we can add it on to swim and we'll"},{"from":3043.38,"to":3046.2,"location":2,"content":"get somewhere close to swimmer so his"},{"from":3046.2,"to":3048.03,"location":2,"content":"space actually did do that"},{"from":3048.03,"to":3052.8,"location":2,"content":"and so this isn't so the mole in some"},{"from":3052.8,"to":3056.16,"location":2,"content":"sense is if you have if you kind of do"},{"from":3056.16,"to":3058.17,"location":2,"content":"carefully control accounts and so on"},{"from":3058.17,"to":3061.23,"location":2,"content":"that conventional methods can also give"},{"from":3061.23,"to":3064.68,"location":2,"content":"you good word vector spaces and I mean"},{"from":3064.68,"to":3066.18,"location":2,"content":"so that was actually the starting off"},{"from":3066.18,"to":3070.2,"location":2,"content":"point for our work on glove so that"},{"from":3070.2,"to":3071.64,"location":2,"content":"essentially there'd been these two"},{"from":3071.64,"to":3075.51,"location":2,"content":"schools of work there had been this"},{"from":3075.51,"to":3077.82,"location":2,"content":"school of work that been explored more"},{"from":3077.82,"to":3080.52,"location":2,"content":"in cog side than anywhere else which had"},{"from":3080.52,"to":3082.89,"location":2,"content":"been based on counting and transforming"},{"from":3082.89,"to":3085.77,"location":2,"content":"counts and you know it had some"},{"from":3085.77,"to":3088.11,"location":2,"content":"advantages or it seemed it had some"},{"from":3088.11,"to":3091.44,"location":2,"content":"advantages right that you're making sort"},{"from":3091.44,"to":3093.33,"location":2,"content":"of efficient use of statistics as you're"},{"from":3093.33,"to":3095.16,"location":2,"content":"using the global statistics of the whole"},{"from":3095.16,"to":3098.58,"location":2,"content":"matrix directly to estimate things and"},{"from":3098.58,"to":3101.88,"location":2,"content":"at that point up until then had really"},{"from":3101.88,"to":3103.56,"location":2,"content":"only been used to capture words"},{"from":3103.56,"to":3106.89,"location":2,"content":"similarity and a lot of it had suffered"},{"from":3106.89,"to":3109.65,"location":2,"content":"from disproportionate input importance"},{"from":3109.65,"to":3112.71,"location":2,"content":"given to large counts but Doug rody it"},{"from":3112.71,"to":3114.45,"location":2,"content":"sort of started to show how to solve"},{"from":3114.45,"to":3116.94,"location":2,"content":"both of these problems and so on the"},{"from":3116.94,"to":3118.62,"location":2,"content":"other hand there'd been these neural"},{"from":3118.62,"to":3120.75,"location":2,"content":"network methods which are kind of direct"},{"from":3120.75,"to":3122.85,"location":2,"content":"prediction methods that we were defining"},{"from":3122.85,"to":3125.1,"location":2,"content":"that probability distribution and trying"},{"from":3125.1,"to":3127.5,"location":2,"content":"to predict the words that occur and they"},{"from":3127.5,"to":3130.5,"location":2,"content":"had some advantages right the fact that"},{"from":3130.5,"to":3132.72,"location":2,"content":"you're sampling means that you're not"},{"from":3132.72,"to":3133.86,"location":2,"content":"going to run out of memory"},{"from":3133.86,"to":3136.08,"location":2,"content":"hopefully I know we've had some memory"},{"from":3136.08,"to":3137.58,"location":2,"content":"problems with homework one"},{"from":3137.58,"to":3139.2,"location":2,"content":"but in principle you're not as bad a"},{"from":3139.2,"to":3141.45,"location":2,"content":"memory position and if you have to"},{"from":3141.45,"to":3143.4,"location":2,"content":"construct a huge matrix because you're"},{"from":3143.4,"to":3146.25,"location":2,"content":"going linearly but you know since you're"},{"from":3146.25,"to":3148.59,"location":2,"content":"doing it sample by sample as inefficient"},{"from":3148.59,"to":3153.96,"location":2,"content":"use of Statistics okay and so but on the"},{"from":3153.96,"to":3155.55,"location":2,"content":"other hand Michael loves work it"},{"from":3155.55,"to":3158.16,"location":2,"content":"performed perfectly off not perfectly"},{"from":3158.16,"to":3161.31,"location":2,"content":"that really well so this sort of led"},{"from":3161.31,"to":3163.07,"location":2,"content":"into this work"},{"from":3163.07,"to":3165.77,"location":2,"content":"that Jeffrey Pennington which is social"},{"from":3165.77,"to":3168.83,"location":2,"content":"media dove can we sort of combine these"},{"from":3168.83,"to":3172.39,"location":2,"content":"ideas and sort of have some of the"},{"from":3172.39,"to":3176.05,"location":2,"content":"goodness of the neural net methods while"},{"from":3176.05,"to":3179.18,"location":2,"content":"trying to do things with some kind of"},{"from":3179.18,"to":3182.87,"location":2,"content":"count matrix and so in particular we"},{"from":3182.87,"to":3185,"location":2,"content":"wanted to get the result in a slightly"},{"from":3185,"to":3188.36,"location":2,"content":"less hacky way that you want to have"},{"from":3188.36,"to":3191.03,"location":2,"content":"components of meaning being linear"},{"from":3191.03,"to":3192.02,"location":2,"content":"operative"},{"from":3192.02,"to":3193.97,"location":2,"content":"linear operations in the vector space"},{"from":3193.97,"to":3196.25,"location":2,"content":"that they're just some effector you're"},{"from":3196.25,"to":3198.59,"location":2,"content":"adding or something like this and so the"},{"from":3198.59,"to":3200.93,"location":2,"content":"crucial observation of this model was"},{"from":3200.93,"to":3203.72,"location":2,"content":"that we could use ratios of"},{"from":3203.72,"to":3206.03,"location":2,"content":"co-occurrence probabilities to encode"},{"from":3206.03,"to":3208.7,"location":2,"content":"meaning components and so the idea here"},{"from":3208.7,"to":3212.24,"location":2,"content":"is if you have a word like ice and you"},{"from":3212.24,"to":3213.92,"location":2,"content":"say how often the things going to"},{"from":3213.92,"to":3215.18,"location":2,"content":"co-occur with that"},{"from":3215.18,"to":3218.03,"location":2,"content":"well solid should co-occur a lot and gas"},{"from":3218.03,"to":3221,"location":2,"content":"should end but well water is also going"},{"from":3221,"to":3223.85,"location":2,"content":"to co-occur a lot and some random word"},{"from":3223.85,"to":3229.91,"location":2,"content":"won't occur much if you have oops if you"},{"from":3229.91,"to":3233.72,"location":2,"content":"have steam you get the opposite pattern"},{"from":3233.72,"to":3237.08,"location":2,"content":"with solid and gas right but so the"},{"from":3237.08,"to":3240.05,"location":2,"content":"thing to notice is it's not enough to"},{"from":3240.05,"to":3242.15,"location":2,"content":"just have large by itself because large"},{"from":3242.15,"to":3244.82,"location":2,"content":"appears both here and here or small"},{"from":3244.82,"to":3246.92,"location":2,"content":"appears there and there the thing that's"},{"from":3246.92,"to":3248.75,"location":2,"content":"interesting is sort of the difference"},{"from":3248.75,"to":3250.91,"location":2,"content":"between these components and they're"},{"from":3250.91,"to":3253.46,"location":2,"content":"indicating a meaning component and so we"},{"from":3253.46,"to":3257.21,"location":2,"content":"can get at that if we look at the ratio"},{"from":3257.21,"to":3260.66,"location":2,"content":"of co-occurrence probabilities and so"},{"from":3260.66,"to":3262.54,"location":2,"content":"for the ratio co-occurrence"},{"from":3262.54,"to":3265.76,"location":2,"content":"probabilities this is a dimension of"},{"from":3265.76,"to":3269.96,"location":2,"content":"meaning and where for other words this"},{"from":3269.96,"to":3272.66,"location":2,"content":"sort of ratio cancels out to about one"},{"from":3272.66,"to":3276.02,"location":2,"content":"and so in this slide I've moved so it's"},{"from":3276.02,"to":3278.81,"location":2,"content":"not my small and large but these are"},{"from":3278.81,"to":3280.94,"location":2,"content":"actually actual counts from a corpus so"},{"from":3280.94,"to":3283.22,"location":2,"content":"we roughly get dimension of meaning"},{"from":3283.22,"to":3285.56,"location":2,"content":"between solid and gas are the ones"},{"from":3285.56,"to":3287.57,"location":2,"content":"coming out is about one because they're"},{"from":3287.57,"to":3290.81,"location":2,"content":"not the dimension of meaning and so it"},{"from":3290.81,"to":3292.73,"location":2,"content":"seems like what we want is we want to"},{"from":3292.73,"to":3294.86,"location":2,"content":"have ratio of co-occurrence"},{"from":3294.86,"to":3296.54,"location":2,"content":"probabilities"},{"from":3296.54,"to":3299.21,"location":2,"content":"come linear in our space and then we're"},{"from":3299.21,"to":3301.82,"location":2,"content":"in a good business and so that's what we"},{"from":3301.82,"to":3304.37,"location":2,"content":"want to set about doing well how can you"},{"from":3304.37,"to":3307.48,"location":2,"content":"do that well the way you can do that is"},{"from":3307.48,"to":3311.06,"location":2,"content":"by if you can make the dot products"},{"from":3311.06,"to":3314.39,"location":2,"content":"equal to the log of the co-occurrence"},{"from":3314.39,"to":3318.02,"location":2,"content":"probability then immediately you get the"},{"from":3318.02,"to":3319.97,"location":2,"content":"fact that when you have a vector"},{"from":3319.97,"to":3324.02,"location":2,"content":"difference it turns into a ratio of the"},{"from":3324.02,"to":3327.59,"location":2,"content":"co-occurrence probabilities and so"},{"from":3327.59,"to":3330.02,"location":2,"content":"essentially the whole of the model is"},{"from":3330.02,"to":3332.03,"location":2,"content":"that we want to have dot products the"},{"from":3332.03,"to":3334.31,"location":2,"content":"logs of co-occurrence probabilities and"},{"from":3334.31,"to":3337.73,"location":2,"content":"so that's what we do so here is our"},{"from":3337.73,"to":3341.21,"location":2,"content":"objective function here and it's made to"},{"from":3341.21,"to":3343.55,"location":2,"content":"look a little bit more complicated but"},{"from":3343.55,"to":3346.16,"location":2,"content":"essentially we've got the squared loss"},{"from":3346.16,"to":3349.67,"location":2,"content":"here and then we're wanting to say the"},{"from":3349.67,"to":3351.8,"location":2,"content":"dot product should be as similar as"},{"from":3351.8,"to":3354.95,"location":2,"content":"possible to the log of the co-occurrence"},{"from":3354.95,"to":3357.35,"location":2,"content":"probability and so you'll they'll be"},{"from":3357.35,"to":3359.87,"location":2,"content":"lost to the extent that they're not the"},{"from":3359.87,"to":3362.6,"location":2,"content":"same but we kind of complexified a"},{"from":3362.6,"to":3365.48,"location":2,"content":"little by putting in biased terms for"},{"from":3365.48,"to":3368.03,"location":2,"content":"both of the two words because maybe the"},{"from":3368.03,"to":3370.16,"location":2,"content":"word is just overall common and likes to"},{"from":3370.16,"to":3373.28,"location":2,"content":"co-occur things or uncommon or doesn't"},{"from":3373.28,"to":3375.74,"location":2,"content":"and then we do one more little trick"},{"from":3375.74,"to":3377.48,"location":2,"content":"because every pun does tricks to make"},{"from":3377.48,"to":3379.76,"location":2,"content":"the performance better is that we also"},{"from":3379.76,"to":3383.48,"location":2,"content":"use this F function in front so that"},{"from":3383.48,"to":3385.31,"location":2,"content":"we're sort of capping the effect that"},{"from":3385.31,"to":3388.34,"location":2,"content":"very common word pairs can have on the"},{"from":3388.34,"to":3391.76,"location":2,"content":"performance of the system okay and so"},{"from":3391.76,"to":3394.04,"location":2,"content":"that gave us the glove model of word"},{"from":3394.04,"to":3398.96,"location":2,"content":"vectors and theoretically the interest"},{"from":3398.96,"to":3401.51,"location":2,"content":"of this was you know a lot of the"},{"from":3401.51,"to":3403.34,"location":2,"content":"preceding literature had been there been"},{"from":3403.34,"to":3404.87,"location":2,"content":"these count methods and there'd been"},{"from":3404.87,"to":3407.39,"location":2,"content":"these prediction methods and the hope"},{"from":3407.39,"to":3409.67,"location":2,"content":"was that this could sort of unify the"},{"from":3409.67,"to":3412.19,"location":2,"content":"two by showing you how you could have a"},{"from":3412.19,"to":3416,"location":2,"content":"method that is estimated simply off a"},{"from":3416,"to":3418.16,"location":2,"content":"count matrix but it's done in the same"},{"from":3418.16,"to":3420.95,"location":2,"content":"kind of iterative loss based estimation"},{"from":3420.95,"to":3422.75,"location":2,"content":"method that's used for the newer methods"},{"from":3422.75,"to":3425.78,"location":2,"content":"to get good word vectors and this also"},{"from":3425.78,"to":3427.49,"location":2,"content":"worked to give good word vectors so"},{"from":3427.49,"to":3430.31,"location":2,"content":"here's glove results for the word fraud"},{"from":3430.31,"to":3434.33,"location":2,"content":"and fog fogs and todor obvious but you"},{"from":3434.33,"to":3436.33,"location":2,"content":"know these different kinds of words"},{"from":3436.33,"to":3439.1,"location":2,"content":"various kinds of pretty tree frogs and"},{"from":3439.1,"to":3441.16,"location":2,"content":"things like that"},{"from":3441.16,"to":3445.91,"location":2,"content":"okay so I'll then go from here and say a"},{"from":3445.91,"to":3448.58,"location":2,"content":"little bit more about some of the work"},{"from":3448.58,"to":3451.67,"location":2,"content":"on evaluating word vectors and this is"},{"from":3451.67,"to":3453.44,"location":2,"content":"maybe also a chance just talking a"},{"from":3453.44,"to":3455.83,"location":2,"content":"little bit about evaluation all together"},{"from":3455.83,"to":3459.89,"location":2,"content":"so normally an NLP when we do evaluation"},{"from":3459.89,"to":3461.81,"location":2,"content":"the first thing that comes up is"},{"from":3461.81,"to":3465.56,"location":2,"content":"intrinsic versus extrinsic evaluation so"},{"from":3465.56,"to":3466.94,"location":2,"content":"normally if there's something we're"},{"from":3466.94,"to":3471.38,"location":2,"content":"trying to do like model words similarity"},{"from":3471.38,"to":3475.34,"location":2,"content":"with word vectors or we're trying to put"},{"from":3475.34,"to":3478.07,"location":2,"content":"parts of speech on words or something we"},{"from":3478.07,"to":3480.56,"location":2,"content":"can just have an intrinsic evaluation of"},{"from":3480.56,"to":3482.93,"location":2,"content":"saying how good a job did you get how"},{"from":3482.93,"to":3484.7,"location":2,"content":"you guessing the right part of speech"},{"from":3484.7,"to":3487.01,"location":2,"content":"are you putting synonyms close together"},{"from":3487.01,"to":3490.46,"location":2,"content":"and that's sort of normally very easy to"},{"from":3490.46,"to":3493.22,"location":2,"content":"do and faster compute and it's useful to"},{"from":3493.22,"to":3495.59,"location":2,"content":"do because it helps us understand the"},{"from":3495.59,"to":3498.05,"location":2,"content":"system on the other hand a lot of the"},{"from":3498.05,"to":3500.57,"location":2,"content":"time those intrinsic evaluations it's"},{"from":3500.57,"to":3503.9,"location":2,"content":"not very clear where whether having done"},{"from":3503.9,"to":3506.21,"location":2,"content":"well on that task is really going to"},{"from":3506.21,"to":3508.85,"location":2,"content":"help us build the amazing natural"},{"from":3508.85,"to":3511.01,"location":2,"content":"language understanding robots that we so"},{"from":3511.01,"to":3514.91,"location":2,"content":"ardently desire so people are also very"},{"from":3514.91,"to":3517.58,"location":2,"content":"interested in extrinsic evaluations and"},{"from":3517.58,"to":3520.28,"location":2,"content":"so extrinsic Lee is then saying well"},{"from":3520.28,"to":3523.88,"location":2,"content":"suppose you use this new stuff in a real"},{"from":3523.88,"to":3527.03,"location":2,"content":"system doesn't make performance go up"},{"from":3527.03,"to":3529.79,"location":2,"content":"and it's then sort of definitional what"},{"from":3529.79,"to":3532.13,"location":2,"content":"counts to you as a real system but"},{"from":3532.13,"to":3533.99,"location":2,"content":"normally that's meaning it's some"},{"from":3533.99,"to":3536.33,"location":2,"content":"application that human beings actually"},{"from":3536.33,"to":3539.24,"location":2,"content":"care about and like to use so that's"},{"from":3539.24,"to":3541.48,"location":2,"content":"something like web search or"},{"from":3541.48,"to":3542.63,"location":2,"content":"question-answering"},{"from":3542.63,"to":3545.45,"location":2,"content":"or a phone dialogue system or something"},{"from":3545.45,"to":3548.03,"location":2,"content":"like that that you can put it into that"},{"from":3548.03,"to":3551.87,"location":2,"content":"system and the numbers get go up so that"},{"from":3551.87,"to":3553.7,"location":2,"content":"seems what you want to do you want to"},{"from":3553.7,"to":3555.53,"location":2,"content":"have stuff that works in real tasks of"},{"from":3555.53,"to":3557.9,"location":2,"content":"course there are sort of on the other"},{"from":3557.9,"to":3559.67,"location":2,"content":"hand a lot of things are a lot harder"},{"from":3559.67,"to":3563.18,"location":2,"content":"then so it's much more work to do such"},{"from":3563.18,"to":3564.1,"location":2,"content":"an evaluate"},{"from":3564.1,"to":3566.47,"location":2,"content":"and to run different variants of a"},{"from":3566.47,"to":3570.16,"location":2,"content":"system and even when the results are"},{"from":3570.16,"to":3573.58,"location":2,"content":"poor or great sometimes it's hard to"},{"from":3573.58,"to":3574.51,"location":2,"content":"diagnose"},{"from":3574.51,"to":3576.88,"location":2,"content":"you know if your great new word vectors"},{"from":3576.88,"to":3579.04,"location":2,"content":"don't work better in the system you know"},{"from":3579.04,"to":3580.81,"location":2,"content":"it might be for sort of some extraneous"},{"from":3580.81,"to":3582.82,"location":2,"content":"reason about how the system was built"},{"from":3582.82,"to":3584.86,"location":2,"content":"it's sort of hiding all your magic and"},{"from":3584.86,"to":3586.36,"location":2,"content":"if you just change the rest of the"},{"from":3586.36,"to":3588.82,"location":2,"content":"system and suddenly show it's good"},{"from":3588.82,"to":3591.61,"location":2,"content":"effect so it's kind of hard to do sort"},{"from":3591.61,"to":3597,"location":2,"content":"of apportionment of goodness and badness"},{"from":3597,"to":3600.52,"location":2,"content":"okay so um so today I'm mainly going to"},{"from":3600.52,"to":3601.87,"location":2,"content":"say a little bit more about these"},{"from":3601.87,"to":3604.78,"location":2,"content":"intrinsic word vector evaluations that"},{"from":3604.78,"to":3607,"location":2,"content":"we've talked about so we've talked quite"},{"from":3607,"to":3610.3,"location":2,"content":"a bit about these analogies so if we're"},{"from":3610.3,"to":3612.55,"location":2,"content":"actually working out the analogies it"},{"from":3612.55,"to":3614.62,"location":2,"content":"turns out that normally what people are"},{"from":3614.62,"to":3617.56,"location":2,"content":"doing is working out a cosine distance"},{"from":3617.56,"to":3621.61,"location":2,"content":"and angle between different word"},{"from":3621.61,"to":3625.27,"location":2,"content":"candidates to work out which is the word"},{"from":3625.27,"to":3627.58,"location":2,"content":"that solves the analogy which is another"},{"from":3627.58,"to":3629.92,"location":2,"content":"little tiny wrinkle of difference there"},{"from":3629.92,"to":3632.38,"location":2,"content":"and there's also one other trick that"},{"from":3632.38,"to":3634.66,"location":2,"content":"people commonly use they forbid the"},{"from":3634.66,"to":3636.88,"location":2,"content":"system from returning one of the three"},{"from":3636.88,"to":3641.86,"location":2,"content":"words you put into the analogy okay but"},{"from":3641.86,"to":3644.05,"location":2,"content":"nevertheless so this is something that"},{"from":3644.05,"to":3645.97,"location":2,"content":"you can evaluate here are now some"},{"from":3645.97,"to":3648.91,"location":2,"content":"gloves of visualizations and so these"},{"from":3648.91,"to":3651.34,"location":2,"content":"glove visualizations show exactly the"},{"from":3651.34,"to":3654.19,"location":2,"content":"same kind of linearity property that"},{"from":3654.19,"to":3656.35,"location":2,"content":"Doug rody had discovered which means"},{"from":3656.35,"to":3658.51,"location":2,"content":"that analogies work sort of by"},{"from":3658.51,"to":3660.16,"location":2,"content":"construction because our vector space"},{"from":3660.16,"to":3662.95,"location":2,"content":"wanted to make meaning components linear"},{"from":3662.95,"to":3667.81,"location":2,"content":"so this is then showing a gender display"},{"from":3667.81,"to":3670.36,"location":2,"content":"this is showing one between companies"},{"from":3670.36,"to":3674.29,"location":2,"content":"and their CEOs kind of cool and you can"},{"from":3674.29,"to":3676.54,"location":2,"content":"also do more syntactic facts so this is"},{"from":3676.54,"to":3679.09,"location":2,"content":"showing positive comparative and"},{"from":3679.09,"to":3682.12,"location":2,"content":"superlative of adjectives yes so"},{"from":3682.12,"to":3684.19,"location":2,"content":"tomorrow Michael off came up with this"},{"from":3684.19,"to":3689.11,"location":2,"content":"idea of doing these analogy tasks and so"},{"from":3689.11,"to":3691.48,"location":2,"content":"he built a data set with a lot of"},{"from":3691.48,"to":3694.09,"location":2,"content":"analogies in it it's sort of a it's a"},{"from":3694.09,"to":3696.16,"location":2,"content":"bit of a weirdo data set because it sort"},{"from":3696.16,"to":3697.08,"location":2,"content":"of tests"},{"from":3697.08,"to":3699.12,"location":2,"content":"few random different things which may"},{"from":3699.12,"to":3700.89,"location":2,"content":"have been things that his system worked"},{"from":3700.89,"to":3704.52,"location":2,"content":"well on but you know it test countries"},{"from":3704.52,"to":3710.25,"location":2,"content":"and capitals country you know cities and"},{"from":3710.25,"to":3712.92,"location":2,"content":"states countries and currencies so"},{"from":3712.92,"to":3714.78,"location":2,"content":"they're a bunch of semantic things that"},{"from":3714.78,"to":3719.13,"location":2,"content":"tests and then there are some syntactic"},{"from":3719.13,"to":3721.76,"location":2,"content":"things that tastes so bad worst fast"},{"from":3721.76,"to":3724.98,"location":2,"content":"fastest for superlatives but you know"},{"from":3724.98,"to":3727.23,"location":2,"content":"even some of the ones are showing before"},{"from":3727.23,"to":3729.99,"location":2,"content":"you know there's no there's no Obama is"},{"from":3729.99,"to":3732.69,"location":2,"content":"too Clinton kind of ones that are"},{"from":3732.69,"to":3736.8,"location":2,"content":"actually in this evaluation set here's a"},{"from":3736.8,"to":3740.01,"location":2,"content":"big table of results that comes from our"},{"from":3740.01,"to":3742.08,"location":2,"content":"glove paper so not surprisingly the"},{"from":3742.08,"to":3744.15,"location":2,"content":"glove paper perform best in this"},{"from":3744.15,"to":3748.31,"location":2,"content":"evaluation because there was our paper"},{"from":3748.31,"to":3751.89,"location":2,"content":"but I mean perhaps you know perhaps the"},{"from":3751.89,"to":3754.89,"location":2,"content":"things to start to notice is yeah if you"},{"from":3754.89,"to":3757.86,"location":2,"content":"just do a plain SPD on counts you know"},{"from":3757.86,"to":3761.73,"location":2,"content":"that that works abominably badly for"},{"from":3761.73,"to":3765.15,"location":2,"content":"these analogy tasks but you know kind of"},{"from":3765.15,"to":3767.88,"location":2,"content":"as Doug rody showed if you start then"},{"from":3767.88,"to":3771.66,"location":2,"content":"doing manipulations of the count matrix"},{"from":3771.66,"to":3774.54,"location":2,"content":"before you do an SVD you can actually"},{"from":3774.54,"to":3777.24,"location":2,"content":"start to produce an SVD based system"},{"from":3777.24,"to":3779.85,"location":2,"content":"that actually performs quite well on"},{"from":3779.85,"to":3783.51,"location":2,"content":"these tasks you know not badly against"},{"from":3783.51,"to":3787.29,"location":2,"content":"other things other things that you will"},{"from":3787.29,"to":3789.03,"location":2,"content":"discover right at the top there are a"},{"from":3789.03,"to":3790.77,"location":2,"content":"hundred dimensional ones and at the"},{"from":3790.77,"to":3792.57,"location":2,"content":"bottom there are some thousand"},{"from":3792.57,"to":3794.16,"location":2,"content":"dimensional ones and other three hundred"},{"from":3794.16,"to":3796.08,"location":2,"content":"dimensional ones at least when you're"},{"from":3796.08,"to":3797.91,"location":2,"content":"training on a big amount of text bigger"},{"from":3797.91,"to":3800.25,"location":2,"content":"dimensionality definitely works better"},{"from":3800.25,"to":3801.78,"location":2,"content":"and I'll come back to that in a minute"},{"from":3801.78,"to":3804.93,"location":2,"content":"the amount of text makes a difference as"},{"from":3804.93,"to":3807.57,"location":2,"content":"well right so we're going up from sort"},{"from":3807.57,"to":3809.85,"location":2,"content":"of Wonder 1.5 billion words the"},{"from":3809.85,"to":3812.07,"location":2,"content":"beginning to these ones down here being"},{"from":3812.07,"to":3814.2,"location":2,"content":"trained over 42 billion words of text"},{"from":3814.2,"to":3817.14,"location":2,"content":"and perhaps unsurprisingly the 42"},{"from":3817.14,"to":3820.58,"location":2,"content":"billion words of text ones work better"},{"from":3820.58,"to":3823.86,"location":2,"content":"so it's big data here are a couple more"},{"from":3823.86,"to":3826.56,"location":2,"content":"steps from this paper so this is a graph"},{"from":3826.56,"to":3828.99,"location":2,"content":"of dimensionality and what the"},{"from":3828.99,"to":3830.5,"location":2,"content":"performance is so if"},{"from":3830.5,"to":3832.87,"location":2,"content":"the three lines the green ones semantics"},{"from":3832.87,"to":3834.94,"location":2,"content":"the blue ones the syntactic analogies"},{"from":3834.94,"to":3838.3,"location":2,"content":"and so Reds the overall score so sort of"},{"from":3838.3,"to":3840.73,"location":2,"content":"what you see is up to dimensionality"},{"from":3840.73,"to":3842.83,"location":2,"content":"three hundred things are clearly"},{"from":3842.83,"to":3845.02,"location":2,"content":"increasing quite a bit and then it gets"},{"from":3845.02,"to":3847.33,"location":2,"content":"fairly flat which is precisely why you"},{"from":3847.33,"to":3849.88,"location":2,"content":"find a lot of word vectors that are have"},{"from":3849.88,"to":3852.73,"location":2,"content":"to mention how these three hundred this"},{"from":3852.73,"to":3855.88,"location":2,"content":"one's showing what window size so this"},{"from":3855.88,"to":3857.14,"location":2,"content":"is sort of what we talked about"},{"from":3857.14,"to":3860.74,"location":2,"content":"symmetric on both sides window size and"},{"from":3860.74,"to":3864.13,"location":2,"content":"as it goes from 2 4 6 8 10 and so what"},{"from":3864.13,"to":3866.38,"location":2,"content":"you see is if you use a very small"},{"from":3866.38,"to":3869.86,"location":2,"content":"window like - that actually works that"},{"from":3869.86,"to":3873.1,"location":2,"content":"the the syntactic prediction is stronger"},{"from":3873.1,"to":3874.69,"location":2,"content":"because a lot of syntactic effects are"},{"from":3874.69,"to":3877.51,"location":2,"content":"very local whereas as you go out the"},{"from":3877.51,"to":3879.4,"location":2,"content":"semantic prediction gets better and"},{"from":3879.4,"to":3879.73,"location":2,"content":"better"},{"from":3879.73,"to":3881.59,"location":2,"content":"actually this syntactic gets a bit"},{"from":3881.59,"to":3883.27,"location":2,"content":"better as well but it's especially the"},{"from":3883.27,"to":3886.21,"location":2,"content":"semantics the gains the right graph"},{"from":3886.21,"to":3888.73,"location":2,"content":"shows that if you only use context on"},{"from":3888.73,"to":3894.13,"location":2,"content":"one side your numbers aren't as good ok"},{"from":3894.13,"to":3896.74,"location":2,"content":"so I sort of just wanted to sort of"},{"from":3896.74,"to":3900.81,"location":2,"content":"sneak in a little cameos of a couple of"},{"from":3900.81,"to":3903.85,"location":2,"content":"recent bits of work how sort of a first"},{"from":3903.85,"to":3906.19,"location":2,"content":"of what things people are doing with"},{"from":3906.19,"to":3910.6,"location":2,"content":"word vectors so this one was actually by"},{"from":3910.6,"to":3914.02,"location":2,"content":"two standard people now the best this"},{"from":3914.02,"to":3915.58,"location":2,"content":"would be the best story if I could say"},{"from":3915.58,"to":3918.55,"location":2,"content":"that this was a final project in this"},{"from":3918.55,"to":3920.32,"location":2,"content":"class last year and I for sure that's"},{"from":3920.32,"to":3921.01,"location":2,"content":"not true"},{"from":3921.01,"to":3922.72,"location":2,"content":"this paper has nothing to do with this"},{"from":3922.72,"to":3931.05,"location":2,"content":"class but in here a Z in your neuron"},{"from":3931.05,"to":3935.44,"location":2,"content":"actually heads some sort of clever and"},{"from":3935.44,"to":3938.59,"location":2,"content":"very messy ideas where they are using"},{"from":3938.59,"to":3942.37,"location":2,"content":"matrix perturbation theory and sort of"},{"from":3942.37,"to":3945.4,"location":2,"content":"showing how dimensionality and word"},{"from":3945.4,"to":3947.5,"location":2,"content":"vectors actually sort of feeds into the"},{"from":3947.5,"to":3949.6,"location":2,"content":"bias-variance tradeoff if you've seen"},{"from":3949.6,"to":3952.51,"location":2,"content":"that in other parts of machine learning"},{"from":3952.51,"to":3954.25,"location":2,"content":"and I'm not even going to attempt to"},{"from":3954.25,"to":3957.7,"location":2,"content":"explain their paper but here it is that"},{"from":3957.7,"to":3959.08,"location":2,"content":"they did really well with this paper"},{"from":3959.08,"to":3961.36,"location":2,"content":"they got all talk at Europe's from it"},{"from":3961.36,"to":3962.81,"location":2,"content":"and"},{"from":3962.81,"to":3964.07,"location":2,"content":"but there's sort of an interesting"},{"from":3964.07,"to":3966.98,"location":2,"content":"result of what you see with these word"},{"from":3966.98,"to":3969.05,"location":2,"content":"vectors which is in the way kind of"},{"from":3969.05,"to":3971.93,"location":2,"content":"surprising so this is showing doing word"},{"from":3971.93,"to":3976.1,"location":2,"content":"vector dimensions from 0 up to 10,000"},{"from":3976.1,"to":3978.08,"location":2,"content":"sorry going way higher than we talked"},{"from":3978.08,"to":3980.57,"location":2,"content":"about before and so what you discover"},{"from":3980.57,"to":3983.63,"location":2,"content":"which people are known for ages is that"},{"from":3983.63,"to":3985.16,"location":2,"content":"there's sort of a little blip that"},{"from":3985.16,"to":3987.2,"location":2,"content":"somewhere around two or three hundred"},{"from":3987.2,"to":3989.98,"location":2,"content":"which seems to optimize performance"},{"from":3989.98,"to":3993.41,"location":2,"content":"abuse those sizes but the thing that"},{"from":3993.41,"to":3995.03,"location":2,"content":"they were sort of doing a lot of their"},{"from":3995.03,"to":3996.77,"location":2,"content":"theory about and it's kind of surprising"},{"from":3996.77,"to":4000.4,"location":2,"content":"is well surely if you have a humongous"},{"from":4000.4,"to":4002.83,"location":2,"content":"humongous number likes if you're using"},{"from":4002.83,"to":4006.52,"location":2,"content":"ten thousand dimensional vectors you"},{"from":4006.52,"to":4009.1,"location":2,"content":"know you're trying to estimate another"},{"from":4009.1,"to":4012.07,"location":2,"content":"two orders of magnitude more numbers for"},{"from":4012.07,"to":4014.74,"location":2,"content":"every word surely things should just"},{"from":4014.74,"to":4017.74,"location":2,"content":"fall apart because you've got hopelessly"},{"from":4017.74,"to":4020.32,"location":2,"content":"many parameters relative to the amount"},{"from":4020.32,"to":4022.18,"location":2,"content":"of training data that you're trying to"},{"from":4022.18,"to":4024.58,"location":2,"content":"estimate these numbers from and so the"},{"from":4024.58,"to":4026.71,"location":2,"content":"interesting result that they show is"},{"from":4026.71,"to":4031.06,"location":2,"content":"that things don't fall apart and that"},{"from":4031.06,"to":4033.9,"location":2,"content":"you can essentially go out to these huge"},{"from":4033.9,"to":4036.13,"location":2,"content":"dimensionalities and the performance"},{"from":4036.13,"to":4038.44,"location":2,"content":"stays flat and that they've got a lot of"},{"from":4038.44,"to":4041.68,"location":2,"content":"theory sort of for predicting why that's"},{"from":4041.68,"to":4043.09,"location":2,"content":"actually going to end up to being the"},{"from":4043.09,"to":4048.04,"location":2,"content":"case yeah so for training these models"},{"from":4048.04,"to":4050.83,"location":2,"content":"iteratively this is quick orange is"},{"from":4050.83,"to":4054.58,"location":2,"content":"showing Glove training you know they"},{"from":4054.58,"to":4056.56,"location":2,"content":"keep on getting better for a while so"},{"from":4056.56,"to":4059.53,"location":2,"content":"you know just go out go sleep see in the"},{"from":4059.53,"to":4061.42,"location":2,"content":"morning how it's doing right so that if"},{"from":4061.42,"to":4063.82,"location":2,"content":"you're running it for 24 hours your"},{"from":4063.82,"to":4065.8,"location":2,"content":"numbers are better than if you only ran"},{"from":4065.8,"to":4068.68,"location":2,"content":"it for six hours and that's true for a"},{"from":4068.68,"to":4072.1,"location":2,"content":"lot of deep learning model sorry so this"},{"from":4072.1,"to":4074.83,"location":2,"content":"is the key reason why you don't want to"},{"from":4074.83,"to":4076.84,"location":2,"content":"start your assignment the night before"},{"from":4076.84,"to":4080.05,"location":2,"content":"it's due because even if you program it"},{"from":4080.05,"to":4082.15,"location":2,"content":"perfectly you might just not have enough"},{"from":4082.15,"to":4084.85,"location":2,"content":"time for it to run so that you produce"},{"from":4084.85,"to":4090.34,"location":2,"content":"good numbers at the end of it okay"},{"from":4090.34,"to":4097.23,"location":2,"content":"yeah so so a couple more things on that"},{"from":4097.23,"to":4101.83,"location":2,"content":"yeah so what are we showing here so"},{"from":4101.83,"to":4104.53,"location":2,"content":"these are again semantic syntactic and"},{"from":4104.53,"to":4106.99,"location":2,"content":"overall numbers so there's sort of two"},{"from":4106.99,"to":4109,"location":2,"content":"things that are so being mixed together"},{"from":4109,"to":4111.76,"location":2,"content":"here one is if we just look at the"},{"from":4111.76,"to":4114.69,"location":2,"content":"overall numbers their highest over here"},{"from":4114.69,"to":4117.91,"location":2,"content":"which is this forty two billion Common"},{"from":4117.91,"to":4120.52,"location":2,"content":"Core web pages corpus that gives us the"},{"from":4120.52,"to":4122.71,"location":2,"content":"highest overall number but there's sort"},{"from":4122.71,"to":4124.72,"location":2,"content":"of something else that interesting in"},{"from":4124.72,"to":4129.07,"location":2,"content":"this graph which is that using Wikipedia"},{"from":4129.07,"to":4132.67,"location":2,"content":"works freakily well so that you actually"},{"from":4132.67,"to":4135.43,"location":2,"content":"find that 1.6 billion tokens of"},{"from":4135.43,"to":4139.3,"location":2,"content":"Wikipedia works better than 4.3 billion"},{"from":4139.3,"to":4142.84,"location":2,"content":"tokens of newswire newspaper article"},{"from":4142.84,"to":4146.41,"location":2,"content":"data and so I think that sort of"},{"from":4146.41,"to":4149.11,"location":2,"content":"actually makes sense which is well you"},{"from":4149.11,"to":4151.54,"location":2,"content":"know the job of encyclopedias is to sort"},{"from":4151.54,"to":4153.49,"location":2,"content":"of explain concepts and how they relate"},{"from":4153.49,"to":4155.73,"location":2,"content":"to each other right so that"},{"from":4155.73,"to":4157.99,"location":2,"content":"encyclopedias are just much more exposed"},{"from":4157.99,"to":4160.81,"location":2,"content":"tree texts that show all the connections"},{"from":4160.81,"to":4163.78,"location":2,"content":"between things whereas newspapers in"},{"from":4163.78,"to":4166.69,"location":2,"content":"general aren't trying to expose it how"},{"from":4166.69,"to":4168.31,"location":2,"content":"things fit together they're just telling"},{"from":4168.31,"to":4170.38,"location":2,"content":"you about you know who got shot dead"},{"from":4170.38,"to":4172.48,"location":2,"content":"last night or something like that"},{"from":4172.48,"to":4175.69,"location":2,"content":"right so so there's this sort of"},{"from":4175.69,"to":4177.04,"location":2,"content":"interesting fact"},{"from":4177.04,"to":4179.71,"location":2,"content":"but this Wikipedia data kind of really"},{"from":4179.71,"to":4183.87,"location":2,"content":"it sort of is differentially useful for"},{"from":4183.87,"to":4187.33,"location":2,"content":"making word vectors and you know in fact"},{"from":4187.33,"to":4190.69,"location":2,"content":"you know when we did very well with our"},{"from":4190.69,"to":4193,"location":2,"content":"glove word vectors and lots of people"},{"from":4193,"to":4195.16,"location":2,"content":"use those you know I think actually one"},{"from":4195.16,"to":4197.47,"location":2,"content":"of the reasons why they work so well is"},{"from":4197.47,"to":4199.87,"location":2,"content":"that the original word to vech vech does"},{"from":4199.87,"to":4202.42,"location":2,"content":"the google distributes built only on"},{"from":4202.42,"to":4205.12,"location":2,"content":"Google News data where Al's sort of have"},{"from":4205.12,"to":4209.97,"location":2,"content":"this Wikipedia data inside them okay"},{"from":4209.97,"to":4213.22,"location":2,"content":"rushing ahead yeah so that there's all"},{"from":4213.22,"to":4215.05,"location":2,"content":"the work on analogy but the other more"},{"from":4215.05,"to":4217.75,"location":2,"content":"basic evaluation is this one of"},{"from":4217.75,"to":4220.51,"location":2,"content":"capturing similarity judgment and I"},{"from":4220.51,"to":4222.85,"location":2,"content":"haven't said much about this but you"},{"from":4222.85,"to":4224.23,"location":2,"content":"know there's this sort of"},{"from":4224.23,"to":4227.2,"location":2,"content":"of sub literature in the psychology"},{"from":4227.2,"to":4230.08,"location":2,"content":"community where people have wanted to"},{"from":4230.08,"to":4233.38,"location":2,"content":"model humans judgments of similarity so"},{"from":4233.38,"to":4236.23,"location":2,"content":"like a good psyche person what you do is"},{"from":4236.23,"to":4238.54,"location":2,"content":"you find your classroom of Psych one"},{"from":4238.54,"to":4240.94,"location":2,"content":"under grads and you show them pairs of"},{"from":4240.94,"to":4243.28,"location":2,"content":"words and say rate these things for"},{"from":4243.28,"to":4245.71,"location":2,"content":"similarity on a scale of 1 to 10 and"},{"from":4245.71,"to":4248.23,"location":2,"content":"lots of that data has been collected and"},{"from":4248.23,"to":4250.54,"location":2,"content":"you work out the mean over human beings"},{"from":4250.54,"to":4253.51,"location":2,"content":"and they give numbers like this of Tiger"},{"from":4253.51,"to":4257.38,"location":2,"content":"and cat 7.35 Tigers similar to Tiger 10"},{"from":4257.38,"to":4260.56,"location":2,"content":"book and paper plane and car stock and"},{"from":4260.56,"to":4263.56,"location":2,"content":"phone stock and CD and you get numbers"},{"from":4263.56,"to":4266.53,"location":2,"content":"so then what we're doing is wanting to"},{"from":4266.53,"to":4269.38,"location":2,"content":"say well let's use distance in the space"},{"from":4269.38,"to":4272.08,"location":2,"content":"to map directly onto these similarity"},{"from":4272.08,"to":4275.68,"location":2,"content":"judgments and how well does it map and"},{"from":4275.68,"to":4278.44,"location":2,"content":"so that sort of similarity judging has"},{"from":4278.44,"to":4281.56,"location":2,"content":"also then been used for evaluating these"},{"from":4281.56,"to":4283.84,"location":2,"content":"systems so again here are a lot of"},{"from":4283.84,"to":4285.7,"location":2,"content":"models this is again from our glove"},{"from":4285.7,"to":4287.95,"location":2,"content":"paper but so there are these various"},{"from":4287.95,"to":4290.41,"location":2,"content":"similarity data sets so one of the best"},{"from":4290.41,"to":4291.91,"location":2,"content":"known ones that I had on the slide"},{"from":4291.91,"to":4296.14,"location":2,"content":"before is this words in 3 5 3"},{"from":4296.14,"to":4300.64,"location":2,"content":"it has 353 different ones in it and so"},{"from":4300.64,"to":4303.1,"location":2,"content":"you're sort of then modeling a"},{"from":4303.1,"to":4305.56,"location":2,"content":"correlation between your judgments of"},{"from":4305.56,"to":4307.66,"location":2,"content":"similarity and the ones that came from"},{"from":4307.66,"to":4311.2,"location":2,"content":"the human beings ok two more things I"},{"from":4311.2,"to":4314.5,"location":2,"content":"want to say yeah so we had that problem"},{"from":4314.5,"to":4318.55,"location":2,"content":"right at the beginning of Clinton and"},{"from":4318.55,"to":4321.67,"location":2,"content":"how that could be various people and"},{"from":4321.67,"to":4323.86,"location":2,"content":"that's perhaps in some sense the"},{"from":4323.86,"to":4326.59,"location":2,"content":"simplest case of words being ambiguous"},{"from":4326.59,"to":4328.78,"location":2,"content":"when you have names which have reference"},{"from":4328.78,"to":4331.87,"location":2,"content":"to different people but it's not only"},{"from":4331.87,"to":4336.43,"location":2,"content":"true of names so by and large words in"},{"from":4336.43,"to":4340.39,"location":2,"content":"human languages ambiguous and have lots"},{"from":4340.39,"to":4343.54,"location":2,"content":"of meanings that's especially true of"},{"from":4343.54,"to":4345.82,"location":2,"content":"common words they always have lots of"},{"from":4345.82,"to":4348.52,"location":2,"content":"meaning it's especially true of words"},{"from":4348.52,"to":4350.89,"location":2,"content":"that have existed for a long time it's"},{"from":4350.89,"to":4353.23,"location":2,"content":"not true of new very technical words you"},{"from":4353.23,"to":4355.3,"location":2,"content":"know carcinoma I think that only has one"},{"from":4355.3,"to":4357.9,"location":2,"content":"meaning but you know if you think"},{"from":4357.9,"to":4363.35,"location":2,"content":"of any relatively common word and starts"},{"from":4363.35,"to":4366.15,"location":2,"content":"scratching your head for a moment you'll"},{"from":4366.15,"to":4369.12,"location":2,"content":"find it has lots of meanings I maybe"},{"from":4369.12,"to":4370.8,"location":2,"content":"this isn't even such a common word but"},{"from":4370.8,"to":4372.89,"location":2,"content":"my random word I've got here is pike"},{"from":4372.89,"to":4375.51,"location":2,"content":"pike has lots of meanings it has"},{"from":4375.51,"to":4380.34,"location":2,"content":"meanings like a fish it's a kind of fish"},{"from":4380.34,"to":4381.93,"location":2,"content":"yeah so there's a fish that's a pike"},{"from":4381.93,"to":4386.64,"location":2,"content":"what else is a pike a large spear yes a"},{"from":4386.64,"to":4389.04,"location":2,"content":"large spear as a pike other kinds of"},{"from":4389.04,"to":4393.03,"location":2,"content":"Pike's gymnastics move or in diving move"},{"from":4393.03,"to":4397.2,"location":2,"content":"it's a road yeah so there are lots of"},{"from":4397.2,"to":4399.72,"location":2,"content":"meanings there are other meanings"},{"from":4399.72,"to":4401.88,"location":2,"content":"I mean Australian English Pike has also"},{"from":4401.88,"to":4404.61,"location":2,"content":"used as a verb to mean to pull out of"},{"from":4404.61,"to":4407.58,"location":2,"content":"doing something like we were all going"},{"from":4407.58,"to":4410.85,"location":2,"content":"to go out to a nightclub later but Joe"},{"from":4410.85,"to":4414.48,"location":2,"content":"piked I don't think that usage is common"},{"from":4414.48,"to":4416.34,"location":2,"content":"in this country but um you can try it"},{"from":4416.34,"to":4421.02,"location":2,"content":"out right but lots of meanings and you"},{"from":4421.02,"to":4423.69,"location":2,"content":"know this isn't only true of the word"},{"from":4423.69,"to":4426.63,"location":2,"content":"Pike you might pick any other simple"},{"from":4426.63,"to":4428.46,"location":2,"content":"word right you can pick a word like"},{"from":4428.46,"to":4432.39,"location":2,"content":"shale or field or house or make you know"},{"from":4432.39,"to":4434.25,"location":2,"content":"they have lots of meanings when it comes"},{"from":4434.25,"to":4436.98,"location":2,"content":"down to it so you know but so how can"},{"from":4436.98,"to":4439.35,"location":2,"content":"this work if we just have one meaning"},{"from":4439.35,"to":4442.44,"location":2,"content":"for words and that's an interesting"},{"from":4442.44,"to":4445.5,"location":2,"content":"question and I was something that we"},{"from":4445.5,"to":4448.4,"location":2,"content":"were actually interested in early on so"},{"from":4448.4,"to":4451.74,"location":2,"content":"um even before the word to vector K mout"},{"from":4451.74,"to":4456.03,"location":2,"content":"back in 2012 we were playing around with"},{"from":4456.03,"to":4460.56,"location":2,"content":"neural word vectors and we thought boy"},{"from":4460.56,"to":4464.7,"location":2,"content":"this is so broken having only one sense"},{"from":4464.7,"to":4467.88,"location":2,"content":"for a word why don't we come up with the"},{"from":4467.88,"to":4469.74,"location":2,"content":"model that has multiple sensors for a"},{"from":4469.74,"to":4472.35,"location":2,"content":"word and so we did that and we did it in"},{"from":4472.35,"to":4475.47,"location":2,"content":"a pretty crude way I guess the way we"},{"from":4475.47,"to":4479.28,"location":2,"content":"did it is say well let's for each common"},{"from":4479.28,"to":4482.7,"location":2,"content":"word let's cluster all the context in"},{"from":4482.7,"to":4486.18,"location":2,"content":"which it occurs and then we'll see if"},{"from":4486.18,"to":4489.06,"location":2,"content":"there seem to be multiple clear clusters"},{"from":4489.06,"to":4491.52,"location":2,"content":"by some criterion for that"},{"from":4491.52,"to":4494.76,"location":2,"content":"and if so we'll just sort of split the"},{"from":4494.76,"to":4496.89,"location":2,"content":"word into pseudo words so if it seems"},{"from":4496.89,"to":4500.13,"location":2,"content":"like that there are five clusters for"},{"from":4500.13,"to":4502.08,"location":2,"content":"the word the example I'm it to use here"},{"from":4502.08,"to":4504.96,"location":2,"content":"is Jaguar five clusters for the word JQ"},{"from":4504.96,"to":4507.21,"location":2,"content":"Oh we'll just call them Jaguar 1 j qo 2"},{"from":4507.21,"to":4510.39,"location":2,"content":"ju a 3 4 5 so we just literally change"},{"from":4510.39,"to":4512.94,"location":2,"content":"the word in our corpus according to us"},{"from":4512.94,"to":4515.01,"location":2,"content":"cluster number and then we run our word"},{"from":4515.01,"to":4517.02,"location":2,"content":"vectoring algorithm and so we get a"},{"from":4517.02,"to":4519.9,"location":2,"content":"representation for each of those senses"},{"from":4519.9,"to":4520.83,"location":2,"content":"of the word"},{"from":4520.83,"to":4522.96,"location":2,"content":"and basically that works right up the"},{"from":4522.96,"to":4525.6,"location":2,"content":"top is Jaguar 1 next luxury and"},{"from":4525.6,"to":4530.19,"location":2,"content":"convertible here is I guess there's a"},{"from":4530.19,"to":4532.56,"location":2,"content":"very old version and Mac OS called"},{"from":4532.56,"to":4534.12,"location":2,"content":"Jaguar and you remember and remember"},{"from":4534.12,"to":4535.23,"location":2,"content":"that one"},{"from":4535.23,"to":4537.6,"location":2,"content":"right so it's Jaguar is right next to"},{"from":4537.6,"to":4539.55,"location":2,"content":"software and Microsoft up there so"},{"from":4539.55,"to":4542.31,"location":2,"content":"that's hopeful he is the Jaguar that's"},{"from":4542.31,"to":4546.03,"location":2,"content":"right next to the hunter and I'm a bit"},{"from":4546.03,"to":4548.01,"location":2,"content":"confused on this one this Jaguars near"},{"from":4548.01,"to":4550.65,"location":2,"content":"solo musical keyboard and string is"},{"from":4550.65,"to":4552.93,"location":2,"content":"there a band a brand of keyboard called"},{"from":4552.93,"to":4555.03,"location":2,"content":"Jack I'm not quite sure about that one"},{"from":4555.03,"to":4557.78,"location":2,"content":"but anyway it sort of basically works"},{"from":4557.78,"to":4560.82,"location":2,"content":"but that was sort of crude and it's also"},{"from":4560.82,"to":4562.8,"location":2,"content":"perhaps problematic it's a lot of time"},{"from":4562.8,"to":4565.41,"location":2,"content":"the divisions between senses aren't very"},{"from":4565.41,"to":4567.75,"location":2,"content":"clear right a lot of senses are actually"},{"from":4567.75,"to":4569.79,"location":2,"content":"related to each other and overlapping"},{"from":4569.79,"to":4572.01,"location":2,"content":"because when how senses normally arrive"},{"from":4572.01,"to":4574.2,"location":2,"content":"is that people stretch the meanings of"},{"from":4574.2,"to":4575.85,"location":2,"content":"words it's not that they just sort of"},{"from":4575.85,"to":4578.19,"location":2,"content":"randomly wake up the next morning and"},{"from":4578.19,"to":4580.89,"location":2,"content":"say I know carpet I could also refer to"},{"from":4580.89,"to":4584.76,"location":2,"content":"that as stone and give a new sense to"},{"from":4584.76,"to":4586.56,"location":2,"content":"the word stone right you sort of take"},{"from":4586.56,"to":4588.96,"location":2,"content":"something that you know about like a web"},{"from":4588.96,"to":4591.18,"location":2,"content":"and you extend it metaphorically to"},{"from":4591.18,"to":4595.17,"location":2,"content":"other uses of webbing so here's a"},{"from":4595.17,"to":4597.3,"location":2,"content":"perhaps more interesting thing so this"},{"from":4597.3,"to":4600.66,"location":2,"content":"is the other Sanjeev Arora paper that I"},{"from":4600.66,"to":4603,"location":2,"content":"was going to mention so that what"},{"from":4603,"to":4607.47,"location":2,"content":"happens if you don't if you don't have"},{"from":4607.47,"to":4610.35,"location":2,"content":"more than one sense for each word well"},{"from":4610.35,"to":4613.23,"location":2,"content":"effectively what you get is that the"},{"from":4613.23,"to":4616.02,"location":2,"content":"word vector that you learn is what's"},{"from":4616.02,"to":4618.63,"location":2,"content":"referred to by physicists and fancy"},{"from":4618.63,"to":4622.41,"location":2,"content":"people as a superposition of the word"},{"from":4622.41,"to":4624.63,"location":2,"content":"vectors of the different sentences"},{"from":4624.63,"to":4627.57,"location":2,"content":"different sensors play super super"},{"from":4627.57,"to":4632.69,"location":2,"content":"position just means a weighted average"},{"from":4632.69,"to":4636.12,"location":2,"content":"so that effectively my meaning of Pyke"},{"from":4636.12,"to":4638.43,"location":2,"content":"is sort of a weighted average of the"},{"from":4638.43,"to":4640.47,"location":2,"content":"vectors for the different senses of Pyke"},{"from":4640.47,"to":4643.32,"location":2,"content":"and the components are just weighted by"},{"from":4643.32,"to":4646.41,"location":2,"content":"their frequency so that part maybe is"},{"from":4646.41,"to":4648.9,"location":2,"content":"perhaps not too surprising but the part"},{"from":4648.9,"to":4651.96,"location":2,"content":"that's really surprising is well if"},{"from":4651.96,"to":4653.97,"location":2,"content":"we're just averaging these word vectors"},{"from":4653.97,"to":4657.3,"location":2,"content":"you think you couldn't get anything out"},{"from":4657.3,"to":4659.43,"location":2,"content":"of the average right like if I tell you"},{"from":4659.43,"to":4661.89,"location":2,"content":"I'm thinking of two numbers and they're"},{"from":4661.89,"to":4663.18,"location":2,"content":"you know"},{"from":4663.18,"to":4666.48,"location":2,"content":"weighted sum is 54 what are my two"},{"from":4666.48,"to":4668.52,"location":2,"content":"numbers right you know sort of really"},{"from":4668.52,"to":4670.5,"location":2,"content":"short of information to be able to"},{"from":4670.5,"to":4673.8,"location":2,"content":"answer my question but well you know for"},{"from":4673.8,"to":4678.36,"location":2,"content":"these word vectors we have these high"},{"from":4678.36,"to":4682.41,"location":2,"content":"dimensional spaces and even though there"},{"from":4682.41,"to":4685.92,"location":2,"content":"are a lot of words the space is so vast"},{"from":4685.92,"to":4688.62,"location":2,"content":"for thoughts dimensions that actual"},{"from":4688.62,"to":4692.52,"location":2,"content":"words or sensors are very sparse in that"},{"from":4692.52,"to":4695.25,"location":2,"content":"space and so it turns out there's this"},{"from":4695.25,"to":4698.46,"location":2,"content":"whole literature on sparse coding"},{"from":4698.46,"to":4701.37,"location":2,"content":"compressed sensing some of which is"},{"from":4701.37,"to":4702.6,"location":2,"content":"actually done by people in the stats"},{"from":4702.6,"to":4706.26,"location":2,"content":"department here which shows that in"},{"from":4706.26,"to":4708.21,"location":2,"content":"these cases where you have these sort of"},{"from":4708.21,"to":4711.27,"location":2,"content":"sparse codes in these high dimensional"},{"from":4711.27,"to":4713.34,"location":2,"content":"spaces you can actually commonly"},{"from":4713.34,"to":4715.65,"location":2,"content":"reconstruct out the components of a"},{"from":4715.65,"to":4717.87,"location":2,"content":"superposition even though all you've"},{"from":4717.87,"to":4719.46,"location":2,"content":"done has sort of done this weighted"},{"from":4719.46,"to":4722.49,"location":2,"content":"average and so this paper looks at how"},{"from":4722.49,"to":4725.55,"location":2,"content":"you can do this and so they have these"},{"from":4725.55,"to":4728.49,"location":2,"content":"underlying meaning components and they"},{"from":4728.49,"to":4731.61,"location":2,"content":"sort of separated out so ty has one"},{"from":4731.61,"to":4733.92,"location":2,"content":"meaning component as in the space of"},{"from":4733.92,"to":4736.44,"location":2,"content":"trousers blouse waist code that makes"},{"from":4736.44,"to":4738.48,"location":2,"content":"sense another one in this meaning"},{"from":4738.48,"to":4740.85,"location":2,"content":"component of season teams winning league"},{"from":4740.85,"to":4744.06,"location":2,"content":"makes sense score line goal has"},{"from":4744.06,"to":4746.37,"location":2,"content":"equalizer clinching schoolís this one"},{"from":4746.37,"to":4748.95,"location":2,"content":"seems to overlap with this one a bit but"},{"from":4748.95,"to":4750.06,"location":2,"content":"here ty"},{"from":4750.06,"to":4752.25,"location":2,"content":"this sort of cable ties and wire ties"},{"from":4752.25,"to":4754.26,"location":2,"content":"and things like that so they're actually"},{"from":4754.26,"to":4756.18,"location":2,"content":"able to pull out the different sense"},{"from":4756.18,"to":4757.54,"location":2,"content":"meanings"},{"from":4757.54,"to":4760.09,"location":2,"content":"from outside out of the meaning of the"},{"from":4760.09,"to":4764.02,"location":2,"content":"word so that is a kind of a cool thing I"},{"from":4764.02,"to":4769.39,"location":2,"content":"just want to say one more thing okay all"},{"from":4769.39,"to":4772.72,"location":2,"content":"the evaluations so far was intrinsic you"},{"from":4772.72,"to":4774.82,"location":2,"content":"also might want to do extrinsic"},{"from":4774.82,"to":4778.03,"location":2,"content":"evaluation why why word vectors excited"},{"from":4778.03,"to":4780.79,"location":2,"content":"people and NLP so much is it turned out"},{"from":4780.79,"to":4782.95,"location":2,"content":"that having this meaning having this"},{"from":4782.95,"to":4785.23,"location":2,"content":"representation of meaning just turned"},{"from":4785.23,"to":4787.48,"location":2,"content":"out to be very useful and sort of"},{"from":4787.48,"to":4790.48,"location":2,"content":"improve all of your tasks after that and"},{"from":4790.48,"to":4793.87,"location":2,"content":"so this is doing named entity"},{"from":4793.87,"to":4795.94,"location":2,"content":"recognition which is labeling persons"},{"from":4795.94,"to":4798.79,"location":2,"content":"and locations and organizations but you"},{"from":4798.79,"to":4800.68,"location":2,"content":"know it's typical of many tasks of what"},{"from":4800.68,"to":4802.93,"location":2,"content":"people found was if you started with a"},{"from":4802.93,"to":4805.06,"location":2,"content":"model without sort of word"},{"from":4805.06,"to":4807.52,"location":2,"content":"representations and you throw in your"},{"from":4807.52,"to":4810.13,"location":2,"content":"word vectors regardless of whether their"},{"from":4810.13,"to":4812.53,"location":2,"content":"word to vehicle glove ones just kind of"},{"from":4812.53,"to":4814.69,"location":2,"content":"your numbers go up a couple of percent"},{"from":4814.69,"to":4817.03,"location":2,"content":"or more and so the word vectors were"},{"from":4817.03,"to":4819.43,"location":2,"content":"just sort of this useful source that you"},{"from":4819.43,"to":4821.89,"location":2,"content":"could throw into any NLP system that you"},{"from":4821.89,"to":4824.53,"location":2,"content":"built and your numbers went up so that"},{"from":4824.53,"to":4825.73,"location":2,"content":"there are just a very effective"},{"from":4825.73,"to":4829,"location":2,"content":"technology which actually did work and"},{"from":4829,"to":4831.16,"location":2,"content":"basically any extrinsic tasks you type"},{"from":4831.16,"to":4835.29,"location":2,"content":"tried it on okay thanks a lot"}]}