1883 · Top K Frequently Mentioned Keywords - LintCode

# Description

Given a list of reviews, a list of keywords and an integer k .
Find out the top k keywords that appear most frequently in different comments, and the K keywords are sorted according to the number of times.
The comparison of strings is case-insensitive. If the mentioned times of keywords are the same in different reviews, list the keywords in alphabetical order.

  • If K is greater than the length of the list keywords, then output keywords directly
  • the length of keywords within range: [1, 100]
  • the length of reviews within range: [1, 1000]
  • keywords [i] consists of lowercase letters
  • reviews [i] consists of uppercase and lowercase letters and punctuation: [ "[", "", "!", "?", ",", ";" , ".", "]", " "]

# Example

Example 1:
Input:
k = 2
keywords = ["anacell", "cetracular", "betacellular"]
reviews = [
  "Anacell provides the best services in the city",
  "betacellular has awesome services",
  "Best services provided by anacell, everyone should use anacell",
]
Output:
["anacell", "betacellular"]
Explanation:
"anacell" is occuring in 2 different reviews and "betacellular" is only occuring in 1 review.
Example 2:
Input:
k = 2
keywords = ["anacell", "betacellular", "cetracular", "deltacellular", "eurocell"]
reviews = [
  "I love anacell Best services; Best services provided by anacell",
  "betacellular has great services",
  "deltacellular provides much better services than betacellular",
  "cetracular is worse than anacell",
  "Betacellular is better than deltacellular.",
]
Output:
["betacellular", "anacell"]
Explanation:
"betacellular" is occuring in 3 different reviews. "anacell" and "deltacellular" are occuring in 2 reviews, but "anacell" is lexicographically smaller.

# Solution

public class Solution {
    /**
     * @param K: a integer
     * @param keywords: a list of string
     * @param reviews: a list of string
     * @return: return the top k keywords list
     */
    private class WordState {
        String word;
        Set<Integer> reviewsCount;
        public WordState(String word) {
            this.word = word;
            this.reviewsCount = new HashSet<>();
        }
    }
    
    
    public List<String> TopkKeywords(int K, String[] keywords, String[] reviews) {
        
        List<String> answer = new ArrayList<>();
        Set<String> keywordsCol = new HashSet<>();
        Map<String, WordState> hashWord = new HashMap<>(); 
        
        for (int i = 0; i < keywords.length; i++)
            keywordsCol.add(keywords[i]);
        for (int i = 0; i < reviews.length; i++) {
            String review = reviews[i];
            
            review = review.replaceAll("[\\!?,;.]", "").toLowerCase();
            review = review.replace("[", "");
            review = review.replace("]", "");
            String[] words = review.split(" ");
            
            for (int j = 0; j < words.length; j++) {
                String word = words[j];
                if (keywordsCol.contains(word)) {
                    WordState wordState;
                    if (hashWord.containsKey(word))
                        wordState = hashWord.get(word);
                    else
                        wordState = new WordState(word);
                    wordState.reviewsCount.add(i);
                    hashWord.put(word, wordState);
                }
            }
        }
        PriorityQueue<WordState> priorityQueue = new PriorityQueue<WordState>(new Comparator<WordState>() {
            @Override
            public int compare(WordState o1, WordState o2) {
                if (o1.reviewsCount.size() != o2.reviewsCount.size())
                    return Integer.compare(o2.reviewsCount.size(), o1.reviewsCount.size());
                else
                    return o1.word.compareTo(o2.word);
            }
        });
        priorityQueue.addAll(hashWord.values());
        if (K > priorityQueue.size()){
            for (int i = 0; i < keywords.length && !priorityQueue.isEmpty(); i++)
                answer.add(priorityQueue.poll().word);
        }
        else{
            for (int i = 0; i < K; i++) {
                answer.add(priorityQueue.poll().word);
            }
        }
        return answer;
    }
}