This post re-implements the "did you mean..?" spell checker from an earlier post using the new Stream type in Java 8.
import java.io.*; import java.nio.charset.StandardCharsets; import java.util.*; import java.util.function.BiFunction; import java.util.function.Consumer; import java.util.function.Function; import java.util.function.IntFunction; import java.util.regex.Pattern; import java.util.stream.IntStream; import java.util.stream.Stream; import static java.util.stream.Stream.concat; public class DidYouMean { private static final Map<String, Integer> DICTIONARY = loadDictionary(); private static Stream<String> edit1(String term) { Stream<String> delete = IntStream.range(0, term.length()) .mapToObj(n -> term.substring(0, n) + term.substring(n + 1)); Stream<String> transpose = IntStream.range(0, term.length() - 1) .mapToObj(n -> term.substring(0, n) + term.charAt(n + 1) + term.charAt(n) + term.substring(n + 2)); Stream<String> replace = IntStream.range(0, term.length()) .boxed() .flatMap(n -> IntStream.rangeClosed('a', 'z').mapToObj(c -> term.substring(0, n) + (char) c + term.substring(n + 1))); Stream<String> insert = IntStream.rangeClosed(0, term.length()) .boxed() .flatMap(n -> IntStream.rangeClosed('a', 'z').mapToObj(c -> term.substring(0, n) + (char) c + term.substring(n))); return concat(concat(concat(delete, transpose), replace), insert); } private static Stream<String> edit2(String term) { return edit1(term).flatMap(DidYouMean::edit1); } public static void main(String... args) { String result = didYouMean(args[0]); if (result == null) { System.out.println("Gibberish!"); } else { System.out.println("Did you mean '" + result + "'?"); } } public static String didYouMean(String term) { String lower = term.toLowerCase(Locale.ENGLISH); if (DICTIONARY.containsKey(lower)) return lower; Comparator<String> bestMatch = (String s1, String s2) -> DICTIONARY.getOrDefault(s1, 0).compareTo(DICTIONARY.getOrDefault(s2, 0)); String result = edit1(lower).max(bestMatch).get(); if (DICTIONARY.containsKey(result)) return result; result = edit2(term).max(bestMatch).get(); return DICTIONARY.containsKey(result) ? result : null; } private static Map<String, Integer> loadDictionary() { try (InputStream data = DidYouMean.class.getResourceAsStream("/demo/big.txt"); Reader reader = new InputStreamReader(data, StandardCharsets.US_ASCII); BufferedReader buffer = new BufferedReader(reader)) { return countWords(buffer.lines()); } catch (IOException e) { throw new IllegalStateException(e); } } private static Map<String, Integer> countWords(Stream<String> lines) { Map<String, Integer> dictionary = new HashMap<>(); BiFunction<String, Integer, Integer> increment = (key, count) -> count == null ? 1 : ++count; Consumer<String> count = word -> dictionary.compute(word.toLowerCase(Locale.ENGLISH), increment); Pattern words = Pattern.compile("[^a-zA-Z]++"); lines.forEach(line -> words.splitAsStream(line).forEach(count)); return Collections.unmodifiableMap(dictionary); } }
No comments:
Post a Comment
All comments are moderated