1883 · Top K Frequently Mentioned Keywords - LintCode
# Description
Given a list of reviews, a list of keywords and an integer k .
Find out the top k keywords that appear most frequently in different comments, and the K keywords are sorted according to the number of times.
The comparison of strings is case-insensitive. If the mentioned times of keywords are the same in different reviews, list the keywords in alphabetical order.
- If K is greater than the length of the list keywords, then output keywords directly
- the length of keywords within range: [1, 100]
- the length of reviews within range: [1, 1000]
- keywords [i] consists of lowercase letters
- reviews [i] consists of uppercase and lowercase letters and punctuation: [ "[", "", "!", "?", ",", ";" , ".", "]", " "]
# Example
Example 1:
Input:
k = 2
keywords = ["anacell", "cetracular", "betacellular"]
reviews = [
"Anacell provides the best services in the city",
"betacellular has awesome services",
"Best services provided by anacell, everyone should use anacell",
]
Output:
["anacell", "betacellular"]
Explanation:
"anacell" is occuring in 2 different reviews and "betacellular" is only occuring in 1 review.
Example 2:
Input:
k = 2
keywords = ["anacell", "betacellular", "cetracular", "deltacellular", "eurocell"]
reviews = [
"I love anacell Best services; Best services provided by anacell",
"betacellular has great services",
"deltacellular provides much better services than betacellular",
"cetracular is worse than anacell",
"Betacellular is better than deltacellular.",
]
Output:
["betacellular", "anacell"]
Explanation:
"betacellular" is occuring in 3 different reviews. "anacell" and "deltacellular" are occuring in 2 reviews, but "anacell" is lexicographically smaller.
# Solution
public class Solution { | |
/** | |
* @param K: a integer | |
* @param keywords: a list of string | |
* @param reviews: a list of string | |
* @return: return the top k keywords list | |
*/ | |
private class WordState { | |
String word; | |
Set<Integer> reviewsCount; | |
public WordState(String word) { | |
this.word = word; | |
this.reviewsCount = new HashSet<>(); | |
} | |
} | |
public List<String> TopkKeywords(int K, String[] keywords, String[] reviews) { | |
List<String> answer = new ArrayList<>(); | |
Set<String> keywordsCol = new HashSet<>(); | |
Map<String, WordState> hashWord = new HashMap<>(); | |
for (int i = 0; i < keywords.length; i++) | |
keywordsCol.add(keywords[i]); | |
for (int i = 0; i < reviews.length; i++) { | |
String review = reviews[i]; | |
review = review.replaceAll("[\\!?,;.]", "").toLowerCase(); | |
review = review.replace("[", ""); | |
review = review.replace("]", ""); | |
String[] words = review.split(" "); | |
for (int j = 0; j < words.length; j++) { | |
String word = words[j]; | |
if (keywordsCol.contains(word)) { | |
WordState wordState; | |
if (hashWord.containsKey(word)) | |
wordState = hashWord.get(word); | |
else | |
wordState = new WordState(word); | |
wordState.reviewsCount.add(i); | |
hashWord.put(word, wordState); | |
} | |
} | |
} | |
PriorityQueue<WordState> priorityQueue = new PriorityQueue<WordState>(new Comparator<WordState>() { | |
@Override | |
public int compare(WordState o1, WordState o2) { | |
if (o1.reviewsCount.size() != o2.reviewsCount.size()) | |
return Integer.compare(o2.reviewsCount.size(), o1.reviewsCount.size()); | |
else | |
return o1.word.compareTo(o2.word); | |
} | |
}); | |
priorityQueue.addAll(hashWord.values()); | |
if (K > priorityQueue.size()){ | |
for (int i = 0; i < keywords.length && !priorityQueue.isEmpty(); i++) | |
answer.add(priorityQueue.poll().word); | |
} | |
else{ | |
for (int i = 0; i < K; i++) { | |
answer.add(priorityQueue.poll().word); | |
} | |
} | |
return answer; | |
} | |
} |