



Hi, I'm developing a very basic web search engine that has several parts. After retrieving results according to a user query, I want to calculate rate of each result and then sort results by calculated rate. Here is my query:

var tmpQuery = (from urls in _context.Urls
                join documents in _context.Documents
                  on urls.UrlId equals documents.DocumentId
                let words = (from words in _context.Words
                             join hits in _context.Hits
                               on words.WordId equals hits.WordId
                             where hits.DocumentId == documents.DocumentId
                             select words.Text)
                select new { urls, documents, words });

var results = (from r in tmpQuery.AsEnumerable()
               where r.urls.ResolvedPath.Contains(breakedQuery, KeywordParts.Url, part) ||
                     r.documents.Title.Contains(breakedQuery, KeywordParts.Title, part) ||
                     r.documents.Keywords.Contains(breakedQuery, KeywordParts.Keywords, part) ||
                     r.documents.Description.Contains(breakedQuery, Description, part) ||
                     r.words.Contains(breakedQuery, KeywordParts.Content, part)

                     select new SearchResult()
                        UrlId = r.urls.UrlId,
                        Url = r.urls.ResolvedPath,
                        IndexedOn = r.documents.IndexedOn,
                        Title = r.documents.Title,
                        Description = r.documents.Description,
                        Host = new Uri(r.urls.ResolvedPath).Host,
                        Length = r.documents.Length,
                        Rate = 0CalculateRating(breakedQuery, r.urls.ResolvedPath, r.documents.Title, r.documents.Keywords, r.documents.Description, r.words)
                     .OrderByDescending(result => result.Rate)
                     .Distinct(new SearchResultEqualityComparer());

and rate is calculated by this method:

private int CalculateRating(IEnumerable<string> breakedQuery, string resolvedPath, string title, string keywords, string description, IEnumerable<string> words)
        var baseRate = 0;

        foreach (var query in breakedQuery)
            /*first I'm breaking up user raw query (Microsoft -Apple) to list of broken
            queries (Microsoft, -Apple) if broken query start with - that means
            results shouldn't have*/
            var none = (query.StartsWith("-"));
            string term = query.Replace("-", "");

            var pathCount = Calculate(resolvedPath, term);
            var titleCount = Calculate(title, term);
            var keywordsCount = Calculate(keywords, term);
            var descriptionCount = Calculate(description, term);
            var wordsCount = Calculate(words, term);

            var result = (pathCount * 100) + (titleCount * 50) + (keywordsCount * 25) + (descriptionCount * 10) + (wordsCount);

            if (none)
                baseRate -= result;
                baseRate += result;
        return baseRate;

    private int Calculate(string source, string query)
        if (!string.IsNullOrWhiteSpace(source))
            return Calculate(source.Split(' ').AsEnumerable<string>(), query);
        return 0;

    private int Calculate(IEnumerable<string> sources, string query)
        var count = 0;
        if (sources != null && sources.Count() > 0)
            //to comparing two strings
            //first case sensitive
            var elements = sources.Where(source => source == query);
            count += elements.Count();
            //second case insensitive (half point of sensitive)
            count += sources.Except(elements).Where(source => source.ToLowerInvariant() == query.ToLowerInvariant()).Count() / 2;
        return count;

Please guide me to improve performance (speed of my search engine is very very low)

+1  A: 

I expect this is down to your from urls in _context.Urls - with no Where on this you're getting a lot of data to then throw away when building up your results. How many items are in tmpQuery / results?

Will A
yes, actually real filtering performed on second query. please look at here to know why i'm using this signature
for test porpuses over 1176 pages, 57283 urls, 35733 words and 330621 hits(relation between word and document saved here)
I expect you'd be much better off trying to do as much of this as you can in a stored procedure.
Will A