Like many good interview questions, the question is phrased a little ambiguously/imprecisely, to force the interviewee to ask clarifying questions and state assumptions. I think a number of the other answers here are good, as they poke at these assumptions and demonstrate big-picture understanding.
I'm assuming the text is stored 'offline' somewhere, but there is a way to iterate over each word in the text without loading the whole text into memory.
Then the F# code below find the top N words. It's only data structure is a mapping of key-value pairs (word, frequency), and it only keeps the top N of those, so the memory use is O(N), which is small. The runtime is O(numWordsInText^2), which is poor, but acceptable given the problem constraints. The gist of the algorithm is straightforward, for each word in the text, count how many times it occurs, and if it's in the running best-N, then add it to the list and remove the previous minimum entry.
Note that the actual program below loads the entire text into memory, merely for convenience of exposition.
#light
// some boilerplate to grab a big piece of text off the web for testing
open System.IO
open System.Net
let HttpGet (url: string) =
let req = System.Net.WebRequest.Create(url)
let resp = req.GetResponse()
let stream = resp.GetResponseStream()
let reader = new StreamReader(stream)
let data = reader.ReadToEnd()
resp.Close()
data
let text = HttpGet "http://www-static.cc.gatech.edu/classes/cs2360_98_summer/hw1"
let words = text.Split([|' ';'\r';'\n'|], System.StringSplitOptions.RemoveEmptyEntries)
// perhaps 'words' isn't actually stored in memory, but so long as we can
// 'foreach' over all the words in the text we're good
let N = 5 // how many 'top frequency' words we want to find
let FindMin map =
// key-value pair with mininum value in a map
let (Some(seed)) = Map.first (fun k v -> Some(k,v)) map
map |> Map.fold_left
(fun (mk,mv) k v -> if v > mv then (mk,mv) else (k,v))
seed
let Main() =
let mutable freqCounts = Map.of_list [ ("",0) ]
for word in words do
let mutable count = 0
for x in words do
if x = word then
count <- count + 1
let minStr,minCount = FindMin freqCounts
if count >= minCount then
freqCounts <- Map.add word count freqCounts
if Seq.length freqCounts > N then
freqCounts <- Map.remove minStr freqCounts
freqCounts
|> Seq.sort_by (fun (KeyValue(k,v)) -> -v)
|> Seq.iter (printfn "%A")
Main()
Output:
[the, 75]
[to, 41]
[in, 34]
[a, 32]
[of, 29]