diff options
author | Marko Zajc <marko@zajc.eu.org> | 2023-07-15 23:32:00 +0200 |
---|---|---|
committer | Marko Zajc <marko@zajc.eu.org> | 2023-08-02 00:58:52 +0200 |
commit | 7c0eec775f623f6ba6fb6c0bc3af3fc59c22fb46 (patch) | |
tree | 91c433a82d63d14d518a0c7ca4b41f8be1b4dae7 | |
parent | 6d3025efc82380827950767a7cb63543cfab82df (diff) |
[module-utilities] Rewrite ChatbotCommand.java
-rw-r--r-- | module-utilities/pom.xml | 16 | ||||
-rw-r--r-- | module-utilities/src/main/java/libot/commands/ChatbotCommand.java | 171 |
2 files changed, 147 insertions, 40 deletions
diff --git a/module-utilities/pom.xml b/module-utilities/pom.xml index 5cc5cae..615ed08 100644 --- a/module-utilities/pom.xml +++ b/module-utilities/pom.xml | |||
@@ -31,7 +31,6 @@ | |||
31 | </scm> | 31 | </scm> |
32 | 32 | ||
33 | <dependencies> | 33 | <dependencies> |
34 | |||
35 | <!-- Cross-module --> | 34 | <!-- Cross-module --> |
36 | <dependency> | 35 | <dependency> |
37 | <groupId>zajc.libot</groupId> | 36 | <groupId>zajc.libot</groupId> |
@@ -42,11 +41,10 @@ | |||
42 | <artifactId>module-libot</artifactId> | 41 | <artifactId>module-libot</artifactId> |
43 | </dependency> | 42 | </dependency> |
44 | 43 | ||
45 | <!-- Chatbot support --> | 44 | <!-- HTTP --> |
46 | <dependency> | 45 | <dependency> |
47 | <groupId>ca.pjer</groupId> | 46 | <groupId>com.konghq</groupId> |
48 | <artifactId>chatter-bot-api</artifactId> | 47 | <artifactId>unirest-java</artifactId> |
49 | <version>2.0.1</version> | ||
50 | </dependency> | 48 | </dependency> |
51 | 49 | ||
52 | <!-- Annotations --> | 50 | <!-- Annotations --> |
@@ -54,7 +52,13 @@ | |||
54 | <groupId>com.github.spotbugs</groupId> | 52 | <groupId>com.github.spotbugs</groupId> |
55 | <artifactId>spotbugs-annotations</artifactId> | 53 | <artifactId>spotbugs-annotations</artifactId> |
56 | </dependency> | 54 | </dependency> |
57 | 55 | ||
56 | <!-- Compression --> | ||
57 | <dependency> | ||
58 | <groupId>org.apache.commons</groupId> | ||
59 | <artifactId>commons-compress</artifactId> | ||
60 | <version>1.23.0</version> | ||
61 | </dependency> | ||
58 | </dependencies> | 62 | </dependencies> |
59 | 63 | ||
60 | <build> | 64 | <build> |
diff --git a/module-utilities/src/main/java/libot/commands/ChatbotCommand.java b/module-utilities/src/main/java/libot/commands/ChatbotCommand.java index e036ba9..3bd223e 100644 --- a/module-utilities/src/main/java/libot/commands/ChatbotCommand.java +++ b/module-utilities/src/main/java/libot/commands/ChatbotCommand.java | |||
@@ -1,69 +1,172 @@ | |||
1 | package libot.commands; | 1 | package libot.commands; |
2 | 2 | ||
3 | import static com.google.code.chatterbotapi.ChatterBotType.PANDORABOTS; | 3 | import static com.github.markozajc.ef.EHandle.handle; |
4 | import static java.util.regex.Pattern.compile; | 4 | import static java.util.regex.Pattern.compile; |
5 | import static javax.xml.xpath.XPathConstants.STRING; | ||
5 | import static libot.core.Constants.*; | 6 | import static libot.core.Constants.*; |
6 | import static libot.core.commands.CommandCategory.UTILITIES; | 7 | import static libot.core.commands.CommandCategory.UTILITIES; |
7 | import static libot.utils.Utilities.array; | 8 | import static libot.utils.Utilities.array; |
8 | import static org.slf4j.LoggerFactory.getLogger; | 9 | import static org.apache.commons.lang3.tuple.Pair.of; |
9 | 10 | ||
10 | import java.util.regex.Pattern; | 11 | import java.io.*; |
12 | import java.util.*; | ||
13 | import java.util.concurrent.Callable; | ||
14 | import java.util.regex.*; | ||
11 | 15 | ||
12 | import org.slf4j.Logger; | 16 | import javax.annotation.*; |
17 | import javax.xml.parsers.*; | ||
18 | import javax.xml.xpath.*; | ||
13 | 19 | ||
14 | import com.google.code.chatterbotapi.*; | 20 | import org.apache.commons.lang3.StringUtils; |
21 | import org.apache.commons.lang3.tuple.Pair; | ||
22 | import org.xml.sax.SAXException; | ||
15 | 23 | ||
24 | import kong.unirest.Unirest; | ||
16 | import libot.core.commands.*; | 25 | import libot.core.commands.*; |
17 | import libot.core.entities.CommandContext; | 26 | import libot.core.entities.CommandContext; |
18 | 27 | ||
28 | @SuppressWarnings("java:S4248") // false positive spam (non-static pattern) | ||
19 | public class ChatbotCommand extends Command { | 29 | public class ChatbotCommand extends Command { |
20 | 30 | ||
21 | private static final String CHOMSKY_ID = "b0dafd24ee35a477"; | 31 | private static final String PANDORABOTS_EMOJI = "<:pandorabots:1129815071556653067>"; |
22 | private static final Pattern XML_REGEX = compile("<(.*?)((?= \\/>)|>)"); | 32 | // nbsp used in place of spaces on the following line because ecj doesn't like |
23 | private static final Logger LOG = getLogger(ChatbotCommand.class); | 33 | // codepoints in multiline strings |
24 | private static final ChatterBot CHATTER_BOT; | 34 | // https://github.com/eclipse-jdt/eclipse.jdt.core/issues/1237 |
35 | private static final String NAME = "**Chomsky [Chatbot %s]**".formatted(PANDORABOTS_EMOJI); // NOSONAR it stays | ||
36 | private static final String BOT_ID = "b0dafd24ee35a477"; | ||
37 | |||
38 | private static final String LEARN_TEXT = "Would you like to teach me a new question and answer?"; | ||
39 | private static final String LEARN_RESPONSE = "I'm sorry, but learning is disabled in this session"; | ||
40 | private static final String LEARN_SUGGESTION = "If you would like to teach me a better reply, just say \"Learn\"."; | ||
41 | |||
42 | private static final DocumentBuilder DOCUMENT_BUILDER; // implementation isn't thread safe | ||
43 | private static final XPathExpression XPATH_EXTRACTOR; // explicitly not thread safe | ||
44 | |||
45 | private static final List<Pair<Pattern, String>> RESPONSE_PARSERS_RECURSIVE = new ArrayList<>(); | ||
46 | private static final List<Pair<Pattern, String>> RESPONSE_PARSERS = new ArrayList<>(); | ||
25 | 47 | ||
26 | static { | 48 | static { |
27 | ChatterBot chatterBot; | 49 | var dbf = DocumentBuilderFactory.newInstance(); |
28 | try { | 50 | DOCUMENT_BUILDER = handle((Callable<DocumentBuilder>) dbf::newDocumentBuilder, e -> null).get(); |
29 | chatterBot = new ChatterBotFactory().create(PANDORABOTS, CHOMSKY_ID); | 51 | |
30 | } catch (Exception e) { | 52 | var xp = XPathFactory.newInstance().newXPath(); |
31 | chatterBot = null; | 53 | XPATH_EXTRACTOR = handle((Callable<XPathExpression>) () -> xp.compile("//result/that/text()"), e -> null).get(); |
32 | LOG.error("Failed to load the chatbot", e); | 54 | |
33 | } | 55 | // useless data, inner data discarded |
56 | RESPONSE_PARSERS.add(of(compile("(?s)<object[^>]*>.*?</object>"), "")); // NOSONAR | ||
57 | RESPONSE_PARSERS.add(of(compile("(?s)<param[^>]*>.*?</param>"), "")); | ||
58 | RESPONSE_PARSERS.add(of(compile("(?s)<embed[^>]*>.*?</embed>"), "")); | ||
59 | RESPONSE_PARSERS.add(of(compile("(?s)<script[^>]*>.*?</script>"), "")); | ||
60 | RESPONSE_PARSERS.add(of(compile("(?s)<style[^>]*>.*?</style>"), "")); | ||
61 | |||
62 | // useless tags, inner data kept | ||
63 | RESPONSE_PARSERS_RECURSIVE.add(of(compile("(?s)<font[^>]*>(.*?)</font>"), "$1")); | ||
64 | RESPONSE_PARSERS_RECURSIVE.add(of(compile("(?s)<div[^>]*>(.*?)</div>"), "$1\n")); | ||
65 | |||
66 | // html we can't turn into markdown but we still change | ||
67 | RESPONSE_PARSERS.add(of(compile("<a href=\"([^\"]*)\"[^>]*>(.*?)<\\/a>"), "$2 (<$1>)")); | ||
68 | RESPONSE_PARSERS.add(of(compile("<br> *"), "\n")); | ||
69 | |||
70 | // html we can turn into markdown | ||
71 | RESPONSE_PARSERS_RECURSIVE.add(of(compile("<b>(.*?)</b>"), "**$1**")); | ||
72 | RESPONSE_PARSERS_RECURSIVE.add(of(compile("<i>(.*?)</i>"), "*$1*")); | ||
73 | RESPONSE_PARSERS_RECURSIVE.add(of(compile("<u>(.*?)</u>"), "__$1__")); | ||
34 | 74 | ||
35 | CHATTER_BOT = chatterBot; | 75 | // botched punctuation |
76 | RESPONSE_PARSERS.add(of(compile(" {2,}"), " ")); | ||
77 | RESPONSE_PARSERS.add(of(compile(" +(?=[\\.,!?])"), "")); | ||
78 | |||
79 | // STATIC_RESPONSES.put(q -> q.startsWith("web search"), ) | ||
36 | } | 80 | } |
37 | 81 | ||
38 | @Override | 82 | @Override |
39 | @SuppressWarnings("null") | ||
40 | public void execute(CommandContext c) throws Exception { | 83 | public void execute(CommandContext c) throws Exception { |
41 | if (CHATTER_BOT == null) | 84 | var session = generateSession(); |
42 | throw c.error("This feature is currently not available. Please try again later!", DISABLED); | ||
43 | 85 | ||
44 | var session = CHATTER_BOT.createSession(); | 86 | c.replyf("You're connected!", """ |
45 | 87 | You can now start chatting with %s. Say hi! | |
46 | c.reply("You're connected!", """ | 88 | Also, if you want Chomsky to ignore a message, prefix it with `#` (eg. `# this message is \ |
47 | You can now start chatting with Chomsky. Say hi! | 89 | ignored`).""", "Type in EXIT to quit", SUCCESS, NAME); |
48 | Also, if you want Chomsky to ignore a message, prefix it with `>` (eg. `> this message is \ | ||
49 | ignored`).""", "Type in EXIT to quit", SUCCESS); | ||
50 | 90 | ||
51 | while (true) { | 91 | while (true) { |
52 | var m = c.askraw(); | 92 | var m = c.askraw(); |
53 | if ("exit".equalsIgnoreCase(m.getContentStripped())) { | 93 | if ("exit".equalsIgnoreCase(m.getContentStripped())) { |
54 | m.addReaction(ACCEPT_EMOJI).queue(); | 94 | m.addReaction(ACCEPT_EMOJI).queue(); |
55 | break; | 95 | break; |
56 | } | ||
57 | 96 | ||
58 | if (!m.getContentRaw().startsWith(">")) { | 97 | } else if (!m.getContentRaw().startsWith("#")) { |
59 | String reply = session.think(m.getContentStripped()); | 98 | c.replyf("%s: %s", NAME, think(session, m.getContentStripped())); |
60 | if (reply.length() == 0) | ||
61 | c.reply("..."); | ||
62 | else | ||
63 | c.reply(XML_REGEX.matcher(reply).replaceAll("")); | ||
64 | } | 99 | } |
65 | } | 100 | } |
101 | } | ||
102 | |||
103 | @Nonnull | ||
104 | @SuppressWarnings("null") | ||
105 | private static String generateSession() { | ||
106 | return UUID.randomUUID().toString(); | ||
107 | } | ||
108 | |||
109 | @Nonnull | ||
110 | public static String think(@Nonnull String text, @Nonnull String session) throws XPathExpressionException, | ||
111 | SAXException, IOException { | ||
112 | if (text.equals("learn") || text.startsWith("learn ")) // return the replacement response | ||
113 | return LEARN_RESPONSE; | ||
114 | |||
115 | var response = getResponse(text, session); | ||
116 | if (response == null) { | ||
117 | return "..."; | ||
118 | |||
119 | } else if (response.equals(LEARN_TEXT)) { | ||
120 | getResponse("no", session); // cancel the prompt and return the replacement response | ||
121 | return LEARN_RESPONSE; | ||
122 | |||
123 | } else if (response.equals(LEARN_SUGGESTION)) { | ||
124 | return think("ok", session); // change the query | ||
125 | } | ||
126 | |||
127 | response = parseResponse(response); | ||
128 | if (response.isEmpty()) | ||
129 | return "..."; | ||
130 | else | ||
131 | return response; | ||
132 | } | ||
133 | |||
134 | @Nullable | ||
135 | @SuppressWarnings("null") | ||
136 | private static String getResponse(@Nonnull String text, @Nonnull String session) throws SAXException, IOException, | ||
137 | XPathExpressionException { | ||
138 | var bytes = Unirest.post("https://www.pandorabots.com/pandora/talk-xml") | ||
139 | .field("botid", BOT_ID) | ||
140 | .field("custid", session) | ||
141 | .field("input", text) | ||
142 | .asBytes() | ||
143 | .mapBody(ByteArrayInputStream::new); | ||
144 | |||
145 | return extractResponse(bytes); | ||
146 | } | ||
147 | |||
148 | @Nonnull | ||
149 | @SuppressWarnings("null") | ||
150 | private static String parseResponse(@Nonnull String response) { | ||
151 | var parsedResponse = response; | ||
152 | for (var parser : RESPONSE_PARSERS_RECURSIVE) { | ||
153 | Matcher m; | ||
154 | while ((m = parser.getKey().matcher(parsedResponse)).find()) // recursive replaceAll | ||
155 | parsedResponse = m.replaceAll(parser.getValue()); | ||
156 | } | ||
157 | |||
158 | for (var parser : RESPONSE_PARSERS) | ||
159 | parsedResponse = parser.getKey().matcher(parsedResponse).replaceAll(parser.getValue()); | ||
160 | |||
161 | return StringUtils.capitalize(parsedResponse.trim()); | ||
162 | } | ||
66 | 163 | ||
164 | @Nullable | ||
165 | private static synchronized String extractResponse(@Nonnull ByteArrayInputStream response) throws SAXException, | ||
166 | IOException, | ||
167 | XPathExpressionException { | ||
168 | var doc = DOCUMENT_BUILDER.parse(response); | ||
169 | return (String) XPATH_EXTRACTOR.evaluate(doc, STRING); | ||
67 | } | 170 | } |
68 | 171 | ||
69 | @Override | 172 | @Override |
@@ -80,7 +183,7 @@ public class ChatbotCommand extends Command { | |||
80 | public String getInfo() { | 183 | public String getInfo() { |
81 | return """ | 184 | return """ |
82 | Opens a chat session with \ | 185 | Opens a chat session with \ |
83 | [Chomsky](http://demo.vhost.pandorabots.com/pandora/talk?botid=b0dafd24ee35a477)."""; | 186 | [Chomsky](http://demo.vhost.pandorabots.com/pandora/talk?botid=b0dafd24ee35a477), the online chatbot."""; |
84 | } | 187 | } |
85 | 188 | ||
86 | @Override | 189 | @Override |