coreBugZJ

此 blog 已弃。

PageRank,HUST Monthly 2011.04.09 之 A,1431

PageRank

Time Limit: 10 Sec Memory Limit: 256 MB
Submissions: 168 Solved: 8

Description

Google is famous over the world, much of the reason for their success lies with the effective search result for their user. In the final, support this effective is the technique called PageRank which largely used the huge url link network. A simple example is a hyperlink in Page A linked to Page B is viewed as A give a vote of support to B.

For us, PageRank is a complex technique, to simplify the problem, let’s analysis a simple model first. For every hyperlink, we just give 2 kind of attribute, the tag on the hyperlink, and the target url hyperlink point. So a hyperlink is viewed as a tag vote to an url.

As a user of search engine, each time we search, we will offer an keywork. Now the problem is for each given keywork, check all given hyperlink, if the tag on hyperlink contain this keywork, we count it’s a effective vote, then list the top 10 most voted url. (if 2 urls get the same votes, we sort it with lexicographic order)

 

Input

First line is N, indicate N hyperlinks. (1<=N<=20000)

Next N lines:

Each line contain two string, the first is the tag of hyperlink and the second is the url.

We guarantee that tags just contain lowcase and the length is less than 7, and urls just not contain blank characters(blank spaces, tabs and line break characters), the length of urls is less than 30.

 

Next line is Q, indicate Q times search. (1<=Q<=50000)

Next Q lines:

Each line just contain a string, indicate the keyword offered by user.

 

Output

For each search, list the top10 most voted urls, and a blankspace and the vote number.(if less then 10 urls be voted, just list all). If no url get votes, just output “Sorry, no result satisfied." In a single line.

For the answer of difference search, output a blank line between them.(Never leave a redundant blank line before the end of file)

 

Important hint, huge output, printf is recommended.

 

Sample Input

10
hustacm http://acm.hust.edu.cn
acmicpc http://acm.hust.edu.cn
acm http://acm.hust.edu.cn
hoj http://acm.hust.edu.cn/thx
vjudge http://acm.hust.edu.cn/vt
forum http://acm.hust.edu.cn/forum
hhoj http://acm.hust.edu.cn/vt
isun http://acm.hust.edu.cn/vt
sempr http://acm.hust.edu.cn/thx
gaewah http://acm.hust.edu.cn/forum
6
acm
m
hoj
zy
j
h

Sample Output

http://acm.hust.edu.cn 3
http://acm.hust.edu.cn 3
http://acm.hust.edu.cn/forum 1
http://acm.hust.edu.cn/thx 1
http://acm.hust.edu.cn/thx 1
http://acm.hust.edu.cn/vt 1
Sorry, no result satisfied.
http://acm.hust.edu.cn/vt 2
http://acm.hust.edu.cn/thx 1
http://acm.hust.edu.cn 1
http://acm.hust.edu.cn/forum 1
http://acm.hust.edu.cn/thx 1
http://acm.hust.edu.cn/vt 1

HINT

 

Source

HONG Zehua / HUST Monthly 2011.04.09


繁琐的字符串插入查找,Trie 灵活应用,因为空间问题,用了一级指针,二级指针,链表。

预先开一个字符串buffer,用于存储 tag 和 url,其它保存 url 的地方只保存相应指针。

先根据 url 字典序将输入的 < tag,url > 排序,利用 Trie 统计对于同一个 url 的所有tag的所有子串对此 url 的 vote,同一tag的相同子串不要重复计数,通过在 Trie 中加入mask实现。

Trie 中节点只保存本关键字投票的 Vote 链表的指针,若此节点对某 url 有投票,则 Vote 链表非空,且只保存得票最高的前 10 名。

程序用时 1932 MS。

  1#include <stdio.h>
  2#include <string.h>
  3#include <stdlib.h>
  4
  5
  6#define  N   20009
  7#define  INF 0x3f3f3f3f
  8
  9#define  STR_LEN  35
 10#define  BUF_TM   (N+N)
 11
 12char buf[ BUF_TM ][ STR_LEN ];
 13int  buf_end;
 14
 15#define  buf_init()  buf_end = 0
 16#define  buf_new()   buf[ buf_end++ ]
 17
 18
 19struct __Data
 20{
 21        char *tag, *url;
 22}
;
 23typedef struct __Data Data;
 24
 25int cmpData( const void *a, const void *b ) {
 26        return strcmp( ((Data*)a)->url, ((Data*)b)->url );
 27}

 28Data  data[ N ];
 29
 30
 31#define  VOTE_TM   3200000
 32#define  VOTE_LIM  10
 33
 34struct __Vote
 35{
 36        char *pSz;
 37        int  vote;
 38        struct __Vote *next;
 39}
;
 40typedef struct __Vote Vote;
 41Vote  vote_mem[ VOTE_TM ];
 42int   vote_end;
 43
 44#define vote_init()   vote_end = 0
 45#define vote_bef(a,b) ( ((a)->vote > (b)->vote) || (((a)->vote == (b)->vote)&&(strcmp((a)->pSz,(b)->pSz)<=0)) )
 46
 47Vote* vote_new() {
 48        Vote *= vote_mem + vote_end++;
 49        memset( p, 0sizeof(Vote) );
 50        return p;
 51}

 52
 53
 54#define  TRIE_TC   26
 55#define  TRIE_TM   2000000
 56
 57struct __Trie
 58{
 59        int flag, mask, cnt;
 60        Vote *pVote;
 61        struct __Trie *ch[ TRIE_TC ];
 62}
;
 63typedef struct __Trie Trie;
 64Trie  trie_mem[ TRIE_TM ];
 65int   trie_end;
 66
 67#define trie_init()   trie_end = 0
 68
 69Trie* trie_new() {
 70        Trie *= trie_mem + trie_end++;
 71        memset( p, 0sizeof(Trie) );
 72        return p;
 73}

 74
 75void trie_addcnt( Trie **pRoot, char *p, int mask ) {
 76        for ( ; ; ) {
 77                if ( (*pRoot) == NULL ) {
 78                        (*pRoot) = trie_new();
 79                }

 80                if ( *p ) {
 81                        pRoot = &( (*pRoot)->ch[ (*p) - 'a' ] );
 82                        ++p;
 83                }

 84                else {
 85                        if ( (*pRoot)->mask != mask ) {
 86                                (*pRoot)->mask = mask;
 87                                ++((*pRoot)->cnt);
 88                        }

 89                        return;
 90                }

 91        }

 92}

 93
 94int trie_getcnt( Trie *root, char *p ) {
 95        for ( ; ; ) {
 96                if ( root == NULL ) {
 97                        return 0;
 98                }

 99                if ( *p ) {
100                        root = root->ch[ (*p) - 'a' ];
101                        ++p;
102                }

103                else {
104                        return root->cnt;
105                }

106        }

107        return 0;
108}

109
110void trie_insert( Trie **pRoot, char *p, char *url, int vote, int mask ) {
111        for ( ; ; ) {
112                if ( (*pRoot) == NULL ) {
113                        (*pRoot) = trie_new();
114                }

115                if ( *p ) {
116                        pRoot = &( (*pRoot)->ch[ (*p) - 'a' ] );
117                        ++p;
118                }

119                else {
120                        (*pRoot)->cnt = 0;
121                        if ( (*pRoot)->mask != mask ) {
122                                Vote *pred, **ppVote, *nv;
123                                int cnt;
124
125                                (*pRoot)->flag = 1;
126                                (*pRoot)->mask = mask;
127
128                                ppVote = &((*pRoot)->pVote);
129                                if ( (*ppVote) == NULL ) {
130                                        (*ppVote) = vote_new();
131                                        (*ppVote)->vote = INF;
132                                }

133
134                                nv = vote_new();
135                                nv->pSz = url;
136                                nv->vote = vote;
137
138                                pred = (*ppVote);
139                                ppVote = &(pred->next);
140                                while ( (*ppVote) != NULL ) {
141                                        if ( vote_bef(pred,nv) && vote_bef(nv,(*ppVote)) ) {
142                                                if ( strcmp( nv->pSz, (*ppVote)->pSz ) != 0 ) {
143                                                        nv->next = pred->next;
144                                                        pred->next = nv;
145                                                }

146                                                break;
147                                        }

148                                        pred = (*ppVote);
149                                        ppVote = &(pred->next);
150                                }

151
152                                if ( (*ppVote) == NULL ) {
153                                        (*ppVote) = nv;
154                                }

155
156                                cnt = 1;
157                                for ( nv = (*pRoot)->pVote->next; (nv!=NULL)&&(cnt<VOTE_LIM); nv=nv->next ) {
158                                        ++cnt;
159                                }

160                                if ( nv != NULL ) {
161                                        nv->next = NULL;
162                                }

163                        }

164                        return;
165                }

166        }

167}

168
169Trie* trie_find( Trie *root, char *p ) {
170        for ( ; ; ) {
171                if ( root == NULL ) {
172                        return NULL;
173                }

174                if ( *p ) {
175                        root = root->ch[ (*p) - 'a' ];
176                        ++p;
177                }

178                else {
179                        return root;
180                }

181        }

182}

183
184void solve( Trie *root, char *pSz ) {
185        Trie *= trie_find( root, pSz );
186        Vote *i;
187        if ( (p==NULL) || (!p->flag) ) {
188                printf( "Sorry, no result satisfied.\n" );
189                return;
190        }

191        for ( i = p->pVote->next; i!=NULL; i=i->next ) {
192                printf( "%s %d\n", i->pSz, i->vote );
193        }

194}

195
196int main() {
197        char  tmp[ STR_LEN ];
198        int n, mask = 10, i, j, k, x, y, vote;
199        Trie *root = NULL;
200
201        vote_init();
202        trie_init();
203        buf_init();
204        scanf( "%d"&n );
205        for ( i = 0; i < n; ++i ) {
206                data[ i ].tag = buf_new();
207                data[ i ].url = buf_new();
208                scanf( "%s%s", data[ i ].tag, data[ i ].url );
209        }

210        qsort( data, n, sizeof(data[0]), cmpData );
211
212        i = j = 0;
213        do {
214                while ( (j<n) && (strcmp(data[j].url,data[i].url)==0) ) {
215                        ++j;
216                }

217
218                for ( k = i; k < j; ++k ) {
219                        ++mask;
220                        for ( x = 0; data[k].tag[x]; ++x ) {
221                                for ( y = x; data[k].tag[y]; ++y ) {
222                                        tmp[ y-x ] = data[k].tag[y];
223                                        tmp[ y-x+1 ] = 0;
224                                        trie_addcnt( &root, tmp, mask );
225                                }

226                        }

227                }

228
229                for ( k = i; k < j; ++k ) {
230                        ++mask;
231                        for ( x = 0; data[k].tag[x]; ++x ) {
232                                for ( y = x; data[k].tag[y]; ++y ) {
233                                        tmp[ y-x ] = data[k].tag[y];
234                                        tmp[ y-x+1 ] = 0;
235                                        vote = trie_getcnt( root, tmp );
236                                        if ( vote > 0 ) {
237                                                trie_insert( &root, tmp, data[i].url, vote, mask );
238                                        }

239                                }

240                        }

241                }

242
243                i = j;
244        }
 while ( j < n );
245
246        scanf( "%d"&n );
247        while ( n-- > 0 ) {
248                scanf( "%s", tmp );
249                solve( root, tmp );
250                if ( n > 0 ) {
251                        printf( "\n" );
252                }

253        }

254        return 0;
255}

256

posted on 2011-04-10 21:23 coreBugZJ 阅读(241) 评论(0)  编辑 收藏 引用 所属分类: ACM


只有注册用户登录后才能发表评论。
【推荐】超50万行VC++源码: 大型组态工控、电力仿真CAD与GIS源码库
网站导航: 博客园   IT新闻   BlogJava   知识库   博问   管理