MITIE 實體抽取例子

嘗試使用mitie進行實體抽取,先記錄一下,後續改成java版本的。java

  1. import mitie
    model_file='/home/test/rasa_nlu_chi/data/total_word_feature_extractor_zh.dat'
    extractor = mitie.total_word_feature_extractor(model_file)
    feats = extractor.get_feature_vector("我")
    print ("First 5 features of word 'home'", feats[0:])ide

  2. import sys, os
    from mitie import *
    sample = ner_training_instance(["I", "am", "looking", "for", "some", "cheap", "Mexican", "food", "."])ui

sample.add_entity(xrange(5,6), "pricerange")
sample.add_entity(xrange(6,7), "cuisine")rest

sample2 = ner_training_instance(["show", "me", "indian", "restaurants", "in", "the", "centre", "."])
sample2.add_entity(xrange(2,3), "cuisine")
sample2.add_entity(xrange(6,7), "area")token

trainer = ner_trainer("/home/test/rasa_nlu_chi/data/total_word_feature_extractor_zh.dat")get

trainer.add(sample)
trainer.add(sample2)input

trainer.num_threads = 4it

ner = trainer.train()class

ner.save_to_disk("new_ner_model.dat")thread

tokens = ["I", "want", "expensive", "korean", "food"]
entities = ner.extract_entities(tokens)

for e in entities:
range = e[0]
tag = e[1]
entity_text = " ".join(tokens[i] for i in range)
print(" " + tag + ": " + entity_text)

  1. from mitie import *ner = mitie.named_entity_extractor("/home/test/rasa_nlu_chi/models/default/model_20200522-171449/entity_extractor.dat")tokens = ["兩居室", "個人", "三居室"]print("Tokenized input:", tokens)model_file='/home/test/rasa_nlu_chi/data/total_word_feature_extractor_zh.dat'extractor = mitie.total_word_feature_extractor(model_file)entities = ner.extract_entities(tokens,extractor)print("\nEntities found:", entities)print("\nNumber of entities detected:", len(entities))for e in entities:range = e[0]tag = e[1]entity_text = " ".join(tokens[i] for i in range)print(" " + tag + ": " + entity_text)

相關文章
相關標籤/搜索