Many researchers are faced with an ever increasing number of journal articles to read and find it difficult to locate papers of relevance to their particular lines of research. However, it is possible to subscribe to various services which claim that they will find articles that fit an `interest profile' that you supply, and pass them on to you. One simple way of performing such a search is to determine whether a pair of keywords occurs `sufficiently' close to each other in the title of an article. The threshold is determined by the researchers themselves, and refers to the number of words that may occur between the pair of keywords. Thus an archeologist interested in cave paintings could specify her profile as ``0 rock art'', meaning that she wants all titles in which the words ``rock'' and ``art'' appear with 0 words in between, that is next to each other. This would select not only ``Rock Art of the Maori'' but also ``Pop Art, Rock, and the Art of Hang-glider Maintenance''.
許多研究人員都面臨這樣一個問題:閱讀的期刊文章數量與日俱增,要找到與他們特定研究方向相關的文章困難重重。然而,有一些訂閱服務聲稱它們能夠按你制定的「興趣配置」找到匹配的文章,並傳送給你。一種簡單的方式就是執行這樣一種搜索:肯定文章中是否有一對單詞出現的「足夠」 靠近。研究人員設定一個閾值,指出一對單詞之間應出現的單詞數量。例如一個考古學家對巖洞壁畫感興趣,就會指定她的興趣配置爲「0 rock art」,意思是她但願標題中出現「rock」和「art」且間隔爲0單詞的全部文章,即這兩個單詞彼此相臨。這樣的興趣配置會選出的標題包括「Rock Art of the Maori」和「Pop Art, Rock, and the Art of Hang-glider Maintenance」等。

Write a program that will read in a series of profiles followed by a series of titles and determine which of the titles (if any) are selected by each of the profiles. A title is selected by a profile if at least one pair of keywords from the profile is found in the title, separated by no more than the given threshold. For the purposes of this program, a word is a sequence of letters, preceded by one or more blanks and terminated by a blank or the end of line marker.



Input will consist of no more than 50 profiles followed by no more than 250 titles. Each profile and title will be numbered in the order of their appearance, starting from 1, although the numbers will not appear in the file.


Each profile will start with the characters ``P:'', and will consist of a number representing a threshold, followed by two or more keywords in lower case.


Each title will start with the characters ``T:'', and will consist of a string of characters terminated by ``|''. The character ``|'' will not occur anywhere in a title except at the end. No title will be longer than 255 characters, and if necessary it will flow on to more than one line. No line will be longer than eighty characters and each continuation line of a title will start with at least one blank. Line breaks will only occur between words.


All non-alphabetic characters are to be ignored, thus the title ``Don't Rock -- the Boat as Metaphor in 1984'' would be treated as ``Dont Rock the Boat as Metaphor in'' and ``HP2100X'' will be treated as ``HPX''. The file will be terminated by a line consisting of a single #.
全部非字母的字符都應忽略,例如標題「Don't Rock -- the Boat as Metaphor in 1984」應被看成「Dont Rock the Boat as Metaphor in」處理,「HP2100X」將被看成「HPX」處理。輸入文件以只有一個#的一行做爲結束。ide



Output will consist of a series of lines, one for each profile in the input. Each line will consist of the profile number (the number of its appearance in the input) followed by ``:'', a blank space, and the numbers of the selected titles in numerical order, separated by commas and with no spaces.


  1. 全部非字母的字符都不處理;
  2. 僅以空格或換行做爲單詞的分隔符;
  3. 單詞均以小寫形式處理;
  4. 配置中的單詞任兩個都要算作一對。









#include <algorithm>
#include <iostream>
#include <string>
#include <vector>
#include <map>
#include <utility>

typedef unsigned long ulong;
typedef unsigned short ushort;

// 用於存儲profile中的閾值和轉成數字序列的關鍵詞組合
struct PROFILE
	size_t nThreshold;
	std::vector<ushort> nArray;

// 用於存儲profile中的閾值和profile的編號,title中的包含的兩個關鍵字之間的距離和title的編號
struct INFO
	size_t nDist;
	size_t nIdx;

typedef std::vector<std::string> VECSTR;
typedef std::vector<ushort> ARRAY;
typedef std::vector<ARRAY> MATRIX;
typedef std::map<ulong, std::vector<INFO> > MAPINFO;
typedef std::pair<size_t, size_t> PAIR;

// 將keywords對中的兩個單詞用數字序列表示,用一個unsigned short數據類型存儲
ulong MakeWordPair(ushort w1, ushort w2)
	return (w1 > w2)? (w1 | (w2 << 16)) : (w2 | (w1 << 16));

// 排序過程,重載「<」運算符
bool operator < (const INFO &f1, const INFO &f2)
	return (f1.nDist < f2.nDist || (f1.nDist == f2.nDist && f1.nIdx < f2.nIdx));

// 去重過程,重載「==」運算符
bool operator == (const INFO &f1, const INFO &f2)
	return (f1.nDist == f2.nDist && f1.nIdx == f2.nIdx);

int main(void)
	VECSTR profileStrs, titleStrs;
	for (std::string str; getline(std::cin, str) && str[0] != '#'; ) {
		// 讀入數據,若以「P:」開頭,則表示profile,若以「T:」開頭,則表示title,若以空格或者tab開頭,則承接上一個title。
		switch(str[0]) {
		case 'P':
			profileStrs.push_back(std::string(str.begin() + 2, str.end()));
		case 'T':
			titleStrs.push_back(std::string(str.begin() + 2, str.end()));
		case ' ':
		case '\t':
			titleStrs.back() += str;
	std::map<std::string, ushort> wordTbl;	     // 用於給每個keywords編號,keywords與編號的映射關係存入wordTbl中
	std::vector<PROFILE> arrProfile;	         // 將每一個profile中的keywords序列轉化爲相應的keywords編號序列
	for (VECSTR::iterator i = profileStrs.begin(); i != profileStrs.end(); ++i) {
		i->push_back(' ');
		std::string::iterator iBeg = i->begin();
		// 因爲profile由閾值和keywords串組成,遍歷profile字符串,找到閾值的起始位置
		for (; iBeg != i->end() && !isdigit(*iBeg); ++iBeg);
		// 找到閾值的結束位置,讀取閾值
		std::string strThre;
		std::string::iterator iEnd = iBeg;
		for (; iEnd != i->end() && isdigit(*iEnd); ++iEnd)
		// 保存每個profile的閾值和由keywords的編號組成的序列
		PROFILE &cur = arrProfile.back();
		// 將閾值由文本形式轉爲數值形式
		cur.nThreshold = atoi(strThre.c_str()); 
		std::string word;   
		for (std::string::iterator j = iEnd; j != i->end(); ++j) {
			if (*j != ' ' && *j != '\t')
			else if (!word.empty()) {
				// 更新keywords與編號的映射表
				ushort &wordIdx = wordTbl[word];
				if (wordIdx == 0)
					wordIdx = wordTbl.size();
				// 存儲keywords編號序列
	// 原輸入爲一個profile對應一組keywords pair,將其轉變爲一個keywords pair對應一個profile編號組,創建映射關係
	MAPINFO profileTbl;
	for (std::vector<PROFILE>::iterator i = arrProfile.begin(); i != arrProfile.end(); ++i)	{
		// 全部的keywords兩兩組合做爲一個keywords pair
		for (ARRAY::iterator j = i->nArray.begin(); j != i->nArray.end() - 1; ++j) {
			for (ARRAY::iterator k = j + 1; k != i->nArray.end(); ++k) {
				INFO info = {i->nThreshold, i - arrProfile.begin()};
				profileTbl[MakeWordPair(*j, *k)].push_back(info);

	MATRIX titleAry;
	for (VECSTR::iterator i = titleStrs.begin(); i != titleStrs.end(); ++i) {
		(*i)[i->size() - 1] = ' ';
		std::string word;
		// 按題中要求處理title,去掉非字母的符號。再將title序列轉化爲編號序列,若某一個單詞爲keyword,則標記爲相應的編號,若不是,則標記爲-1
		for (std::string::iterator j = i->begin(); j != i->end(); ++j) {
			char cTmp = tolower(*j);
			if (cTmp != ' ' && cTmp != '\t') {
				if (isalpha(cTmp))
			else if (!word.empty()) {
				std::map<std::string, ushort>::iterator idx = wordTbl.find(word);
				titleAry.back().push_back(idx != wordTbl.end() ? idx->second : -1);
	// 每個title中包含多個keywords pair,計算並存儲每對keywords的距離
	MAPINFO titleTbl;
	for (MATRIX::iterator i = titleAry.begin(); i != titleAry.end(); ++i) {
		// 對當前title創建keywords pair,每對keywords的距離以及title編號的映射表
		std::map<ulong, ushort> curWordmap;
		for (ARRAY::iterator j = i->begin(); j != i->end() - 1; ++j) {
			if (*j != ushort(-1)) {
				for (ARRAY::iterator k = j + 1; k != i->end(); ++k) {
					if (*k != ushort(-1)) {
						// 若存在關鍵字對,則計算兩個關鍵字間的距離,保留最小值
						ushort nDist = k - j;
						ushort &nWord = curWordmap[MakeWordPair(*j, *k)];
						if (nWord == 0 || nDist < nWord)
							nWord = nDist;
		// 將title處理爲一個keywords pair對應一組title編號和距離
		for (std::map<ulong, ushort>::iterator j = curWordmap.begin(); j != curWordmap.end(); ++j) {
			INFO info = {j->second, i - titleAry.begin()};
	// 比較profile和title,肯定哪些title屬於相應的profile
	std::vector<PAIR> result;
	for (MAPINFO::iterator i = profileTbl.begin(); i != profileTbl.end(); ++i) {
		std::vector<INFO> &curP = i->second;
		std::vector<INFO> &curT = titleTbl[i->first];
		// 判斷title中是否有該keywords pair
		if (!curT.empty()) {
			// 當profile和title包含相同的keywords時,將當前的profile編號排序去重
			std::sort(curP.begin(), curP.end());
			curP.erase(std::unique(curP.begin(), curP.end()), curP.end());
			std::sort(curT.begin(), curT.end());    // 將當前的title編號排序
			for (std::vector<INFO>::iterator icurP = curP.begin(), icurT = curT.begin(); 
				icurP != curP.end() && icurT != curT.end();) {
					// 若當前title中關鍵字的距離小於當前profile中閾值,則該title的編號一定屬於當前以後的全部profile(包含當前profile)
					// 若大於當前閾值,則去下一個profile的閾值
				if (icurT->nDist - 1 <= icurP->nDist) {
					for (std::vector<INFO>::iterator j = icurP; j != curP.end(); ++j)
						result.push_back(std::make_pair(j->nIdx + 1, icurT->nIdx + 1));
			result.push_back(std::make_pair(curP.front().nIdx + 1, 0));
	// 對結果排序並輸出
	std::sort(result.begin(), result.end());
	int nProfIdx = 0;
	for (std::vector<PAIR>::iterator i = result.begin(); i != result.end(); ++i) {
		if (i->first != nProfIdx) {
			nProfIdx = i->first;
			if (i != result.begin())
				std::cout << std::endl;
			std::cout << nProfIdx << ": ";
			if (i->second != 0)
				std::cout << i->second;
		else if (i->second != 0)
				std::cout << ',' << i->second;
	std::cout << std::endl;
	return 0;