w2v/ `-- trunk |-- LICENSE |-- README.txt |-- compute-accuracy.c |-- demo-analogy.sh |-- demo-classes.sh |-- demo-phrase-accuracy.sh |-- demo-phrases.sh |-- demo-train-big-model-v1.sh |-- demo-word-accuracy.sh |-- demo-word.sh |-- distance.c |-- makefile |-- questions-phrases.txt |-- questions-words.txt |-- word-analogy.c |-- word2phrase.c `-- word2vec.c
CC = gcc #Using -Ofast instead of -O3 might result in faster code, but is supported only by newer GCC versions CFLAGS = -lm -pthread -O3 -march=native -Wall -funroll-loops -Wno-unused-result all: word2vec word2phrase distance word-analogy compute-accuracy word2vec : word2vec.c $(CC) word2vec.c -o word2vec $(CFLAGS) word2phrase : word2phrase.c $(CC) word2phrase.c -o word2phrase $(CFLAGS) distance : distance.c $(CC) distance.c -o distance $(CFLAGS) word-analogy : word-analogy.c $(CC) word-analogy.c -o word-analogy $(CFLAGS) compute-accuracy : compute-accuracy.c $(CC) compute-accuracy.c -o compute-accuracy $(CFLAGS) chmod +x *.sh clean: rm -rf word2vec word2phrase distance word-analogy compute-accuracy
Enter word or sentence (EXIT to break): china Word: china Position in vocabulary: 486 Word Cosine distance ------------------------------------------------------------------------ japan 0.648631 taiwan 0.630534 manchuria 0.599535 tibet 0.583566 prc 0.560898 kalmykia 0.558937 xiamen 0.556037 jiang 0.553501 chinese 0.547065 liao 0.543676 india 0.536273 korea 0.534758 roc 0.530741 thailand 0.529334 hunan 0.527629 liang 0.527374 shanghai 0.526314 chongqing 0.525559 nanjing 0.521342 yunnan 0.518669 wuhan 0.516914 zhao 0.513246 xinjiang 0.509939 tuva 0.507322 guangdong 0.507288 hubei 0.505540 guangxi 0.501068 taipei 0.497673 macao 0.497303 hainan 0.494808 shandong 0.493323 shenzhen 0.491871 hangzhou 0.489323 balhae 0.488846 guangzhou 0.486907 fujian 0.485473 zhejiang 0.485011 harbin 0.483171