cd MedAIR # move to MedAIR directory
python setup.py develop --user #you're on your own for finding and installing dependencies
cd scripts
python build_db.py
python build_word_freq.py
python build_alignment_matrix.py # default embedding bio-wv, use --embedding to change
python build_elastic_search.py #requires elasticsearch to be running in background
cd MedAIR # move to MedAIR directory
cd alignment/retriever
python alignment_ranker.py # currently runs all embeddings, so make sure you perform build_alignment_matrix.py for all embeddings or change the code to only run the desired embedding
Embedding | download link |
bio-wv | https://ftp.ncbi.nlm.nih.gov/pub/lu/Suppl/BioSentVec/BioWordVec_PubMed_MIMICIII_d200.vec.bin |
glove | http://nlp.stanford.edu/data/glove.840B.300d.zip |
pubmed-pmc-wv | http://evexdb.org/pmresources/vec-space-models/ |
wiki-pubmed-pmc-wv | http://evexdb.org/pmresources/vec-space-models/ |
Embedding | # docs | # question tokens | # embedded question tokens | % question tokens embedded |
bio-wv | 231581 | 32431 | 28816 | 88.9 |
glove | 231581 | 32431 | 25124 | 77.5 |
pubmed-pmc-wv | 231581 | 32431 | 26459 | 81.6 |
wiki-pubmed-pmc-wv | 231581 | 32431 | 26745 | 82.5 |
es top 1000
top 30 |
bio-wv | pubmed-pmc-wv | wiki-pubmed-pmc-wv | glove |
dev | 25.4 | 25.9 | 27.2 | 27.6 |
test | 26.4 | 26.6 | 26.1 | 25.7 |
train | 25.7 | 25.4 | 25.1 | 25.3 |
es top 10_000
top 30 |
bio-wv | pubmed-pmc-wv | wiki-pubmed-pmc-wv | glove |
dev | 25.6 | 25.5 | 24.8 | 28.3 |
test | 26.8 | 27.1 | 28.3 | 25.7 |
train | 25.8 | 25.8 | 25.7 | 25.1 |
es top 1000
top 100 |
bio-wv | pubmed-pmc-wv | wiki-pubmed-pmc-wv | glove |
dev | 25.6 | 26.2 | 27.1 | 28.5 |
test | 25.7 | 26.1 | 25.6 | 25.3 |
train | 25.6 | 25.3 | 25.1 | 25.2 |
es top 1000
top 30 nltk tokenized |
bio-wv | pubmed-pmc-wv | wiki-pubmed-pmc-wv | glove |
dev | 25.7 | 27.2 | 27.8 | 26.5 |
test | 26.5 | 25.7 | 26.2 | 25.0 |
train | 26.2 | 25.6 | 25.6 | 25.7 |
topn | 100 | 40 | 30 | 20 | 10 | 100 | 30 |
nltk tokenize | False | False | False | False | False | True | True |
dev | 31.1 | 31.5 | 32.1 | 32.2 | 32.9 | 30.4 | 31.9 |
test | 33.5 | 32.6 | 33.5 | 32.7 | 32.1 | 32.3 | 33.2 |
train | 30.9 | 31.8 | 32.1 | 31.5 | 31.4 | 30.3 | 31.5 |