From a9aad7d9db1250ce5d8f0e822b3c3d8c93b9de74 Mon Sep 17 00:00:00 2001 From: Zlatin Balevsky Date: Tue, 5 Nov 2019 12:57:16 +0000 Subject: [PATCH 1/6] test with deleted files --- .../test/groovy/com/muwire/core/files/FileManagerTest.groovy | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/test/groovy/com/muwire/core/files/FileManagerTest.groovy b/core/src/test/groovy/com/muwire/core/files/FileManagerTest.groovy index d6cf9938..dc891ec7 100644 --- a/core/src/test/groovy/com/muwire/core/files/FileManagerTest.groovy +++ b/core/src/test/groovy/com/muwire/core/files/FileManagerTest.groovy @@ -149,7 +149,7 @@ class FileManagerTest { manager.onFileLoadedEvent new FileLoadedEvent(loadedFile : sf1) manager.onFileLoadedEvent new FileLoadedEvent(loadedFile : sf2) - manager.onFileUnsharedEvent new FileUnsharedEvent(unsharedFile: sf2) + manager.onFileUnsharedEvent new FileUnsharedEvent(deleted : true, unsharedFile: sf2) manager.onSearchEvent new SearchEvent(searchHash : ih.getRoot()) Thread.sleep(20) @@ -170,7 +170,7 @@ class FileManagerTest { SharedFile sf2 = new SharedFile(f2, ih2, 0) manager.onFileLoadedEvent new FileLoadedEvent(loadedFile: sf2) - manager.onFileUnsharedEvent new FileUnsharedEvent(unsharedFile: sf2) + manager.onFileUnsharedEvent new FileUnsharedEvent(deleted : true, unsharedFile: sf2) // 1 match left manager.onSearchEvent new SearchEvent(searchTerms: ["c"]) From 7e881f1fe6d9447a913060f201e7a9cd2dae1006 Mon Sep 17 00:00:00 2001 From: Zlatin Balevsky Date: Tue, 5 Nov 2019 12:57:52 +0000 Subject: [PATCH 2/6] close() output streams on rejection, update test --- .../com/muwire/core/connection/ConnectionAcceptor.groovy | 3 ++- .../com/muwire/core/connection/ConnectionAcceptorTest.groovy | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/core/src/main/groovy/com/muwire/core/connection/ConnectionAcceptor.groovy b/core/src/main/groovy/com/muwire/core/connection/ConnectionAcceptor.groovy index 7b19a98d..d1df2abe 100644 --- a/core/src/main/groovy/com/muwire/core/connection/ConnectionAcceptor.groovy +++ b/core/src/main/groovy/com/muwire/core/connection/ConnectionAcceptor.groovy @@ -159,6 +159,7 @@ class ConnectionAcceptor { } } catch (Exception ex) { log.log(Level.WARNING, "incoming connection failed",ex) + e.getOutputStream().close() e.close() eventBus.publish new ConnectionEvent(endpoint: e, incoming: true, leaf: null, status: ConnectionAttemptStatus.FAILED) } @@ -207,7 +208,7 @@ class ConnectionAcceptor { os.writeShort(json.bytes.length) os.write(json.bytes) } - e.outputStream.flush() + e.outputStream.close() e.close() eventBus.publish(new ConnectionEvent(endpoint: e, incoming: true, leaf: leaf, status: ConnectionAttemptStatus.REJECTED)) } diff --git a/core/src/test/groovy/com/muwire/core/connection/ConnectionAcceptorTest.groovy b/core/src/test/groovy/com/muwire/core/connection/ConnectionAcceptorTest.groovy index 6cd4589e..fc052f2b 100644 --- a/core/src/test/groovy/com/muwire/core/connection/ConnectionAcceptorTest.groovy +++ b/core/src/test/groovy/com/muwire/core/connection/ConnectionAcceptorTest.groovy @@ -95,7 +95,7 @@ class ConnectionAcceptorTest { connectionEstablisher = connectionEstablisherMock.proxyInstance() acceptor = new ConnectionAcceptor(eventBus, connectionManager, settings, i2pAcceptor, - hostCache, trustService, searchManager, uploadManager, null, connectionEstablisher) + hostCache, trustService, searchManager, uploadManager, null, connectionEstablisher, null) acceptor.start() Thread.sleep(100) } From 8dcba7535c7a75517d29a9a7e47d7066eeec3149 Mon Sep 17 00:00:00 2001 From: Zlatin Balevsky Date: Tue, 5 Nov 2019 13:24:22 +0000 Subject: [PATCH 3/6] modify indexing and search logic to account for phrases --- .../com/muwire/core/search/SearchIndex.groovy | 31 ++++++++++++++++--- .../muwire/core/search/SearchIndexTest.groovy | 18 +++++++++++ 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/core/src/main/groovy/com/muwire/core/search/SearchIndex.groovy b/core/src/main/groovy/com/muwire/core/search/SearchIndex.groovy index 86b5b5fd..3e87a762 100644 --- a/core/src/main/groovy/com/muwire/core/search/SearchIndex.groovy +++ b/core/src/main/groovy/com/muwire/core/search/SearchIndex.groovy @@ -31,25 +31,48 @@ class SearchIndex { } } - private static String[] split(String source) { - source = source.replaceAll(SplitPattern.SPLIT_PATTERN, " ").toLowerCase() - String [] split = source.split(" ") + private static String[] split(final String source) { + // first split by split pattern + String sourceSplit = source.replaceAll(SplitPattern.SPLIT_PATTERN, " ").toLowerCase() + String [] split = sourceSplit.split(" ") def rv = [] split.each { if (it.length() > 0) rv << it } + + // then just by ' ' + source.split(' ').each { if (it.length() > 0) rv << it } + + // and add original string + rv << source rv.toArray(new String[0]) } String[] search(List terms) { Set rv = null; + Set powerSet = new HashSet<>() terms.each { + powerSet.addAll(it.toLowerCase().split(' ')) + } + + powerSet.each { Set forWord = keywords.getOrDefault(it,[]) if (rv == null) { rv = new HashSet<>(forWord) } else { rv.retainAll(forWord) } - + } + + // now, filter by terms + for (Iterator iter = rv.iterator(); iter.hasNext();) { + String candidate = iter.next() + candidate = candidate.toLowerCase() + boolean keep = true + terms.each { + keep &= candidate.contains(it) + } + if (!keep) + iter.remove() } if (rv != null) diff --git a/core/src/test/groovy/com/muwire/core/search/SearchIndexTest.groovy b/core/src/test/groovy/com/muwire/core/search/SearchIndexTest.groovy index 03264808..6cae8645 100644 --- a/core/src/test/groovy/com/muwire/core/search/SearchIndexTest.groovy +++ b/core/src/test/groovy/com/muwire/core/search/SearchIndexTest.groovy @@ -90,4 +90,22 @@ class SearchIndexTest { def found = index.search(["muwire", "0", "3", "jar"]) assert found.size() == 1 } + + @Test + void testOriginalText() { + initIndex(["a-b c-d"]) + def found = index.search(['a-b']) + assert found.size() == 1 + found = index.search(['c-d']) + assert found.size() == 1 + } + + @Test + void testPhrase() { + initIndex(["a-b c-d e-f"]) + def found = index.search(['a-b c-d']) + assert found.size() == 1 + assert index.search(['c-d e-f']).size() == 1 + assert index.search(['a-b e-f']).size() == 0 + } } From b865376d248e021b26c541989cbdde78a2369a22 Mon Sep 17 00:00:00 2001 From: Zlatin Balevsky Date: Tue, 5 Nov 2019 14:41:27 +0000 Subject: [PATCH 4/6] more tests --- .../com/muwire/core/search/SearchIndexTest.groovy | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/core/src/test/groovy/com/muwire/core/search/SearchIndexTest.groovy b/core/src/test/groovy/com/muwire/core/search/SearchIndexTest.groovy index 6cae8645..6d80052c 100644 --- a/core/src/test/groovy/com/muwire/core/search/SearchIndexTest.groovy +++ b/core/src/test/groovy/com/muwire/core/search/SearchIndexTest.groovy @@ -108,4 +108,16 @@ class SearchIndexTest { assert index.search(['c-d e-f']).size() == 1 assert index.search(['a-b e-f']).size() == 0 } + + @Test + void testMixedPhraseAndKeyword() { + initIndex(["My siamese cat video", + "My cat video of a siamese", + "Video of a siamese cat"]) + + assert index.search(['cat video']).size() == 2 + assert index.search(['cat video','siamese']).size() == 2 + assert index.search(['cat', 'video siamese']).size() == 0 + assert index.search(['cat','video','siamese']).size() == 3 + } } From 9feb891c51b2a027c890845a9f06e734ac271930 Mon Sep 17 00:00:00 2001 From: Zlatin Balevsky Date: Tue, 5 Nov 2019 15:52:23 +0000 Subject: [PATCH 5/6] support phrases in search --- .../com/muwire/clilanterna/SearchModel.groovy | 5 +- .../com/muwire/core/SplitPattern.groovy | 84 +++++++++++++++++++ .../com/muwire/core/SplitPatternTest.groovy | 27 ++++++ .../com/muwire/gui/MainFrameController.groovy | 6 +- 4 files changed, 113 insertions(+), 9 deletions(-) create mode 100644 core/src/test/groovy/com/muwire/core/SplitPatternTest.groovy diff --git a/cli-lanterna/src/main/groovy/com/muwire/clilanterna/SearchModel.groovy b/cli-lanterna/src/main/groovy/com/muwire/clilanterna/SearchModel.groovy index 9ec69b50..573a82ad 100644 --- a/cli-lanterna/src/main/groovy/com/muwire/clilanterna/SearchModel.groovy +++ b/cli-lanterna/src/main/groovy/com/muwire/clilanterna/SearchModel.groovy @@ -49,10 +49,7 @@ class SearchModel { searchEvent = new SearchEvent(searchHash : root, uuid : UUID.randomUUID(), oobInfohash : true, compressedResults : true) payload = root } else { - def replaced = query.toLowerCase().trim().replaceAll(SplitPattern.SPLIT_PATTERN, " ") - def terms = replaced.split(" ") - def nonEmpty = [] - terms.each { if (it.length() > 0) nonEmpty << it } + def nonEmpty = SplitPattern.termify(query) payload = String.join(" ", nonEmpty).getBytes(StandardCharsets.UTF_8) searchEvent = new SearchEvent(searchTerms : nonEmpty, uuid : UUID.randomUUID(), oobInfohash: true, searchComments : core.muOptions.searchComments, compressedResults : true) diff --git a/core/src/main/groovy/com/muwire/core/SplitPattern.groovy b/core/src/main/groovy/com/muwire/core/SplitPattern.groovy index 0908d045..606c903a 100644 --- a/core/src/main/groovy/com/muwire/core/SplitPattern.groovy +++ b/core/src/main/groovy/com/muwire/core/SplitPattern.groovy @@ -3,5 +3,89 @@ package com.muwire.core class SplitPattern { public static final String SPLIT_PATTERN = "[\\*\\+\\-,\\.:;\\(\\)=_/\\\\\\!\\\"\\\'\\\$%\\|\\[\\]\\{\\}\\?]"; + + private static final Set SPLIT_CHARS = new HashSet<>() + static { + SPLIT_CHARS.with { + add(' '.toCharacter()) + add('*'.toCharacter()) + add('+'.toCharacter()) + add('-'.toCharacter()) + add(','.toCharacter()) + add('.'.toCharacter()) + add(':'.toCharacter()) + add(';'.toCharacter()) + add('('.toCharacter()) + add(')'.toCharacter()) + add('='.toCharacter()) + add('_'.toCharacter()) + add('/'.toCharacter()) + add('\\'.toCharacter()) + add('!'.toCharacter()) + add('\''.toCharacter()) + add('$'.toCharacter()) + add('%'.toCharacter()) + add('|'.toCharacter()) + add('['.toCharacter()) + add(']'.toCharacter()) + add('{'.toCharacter()) + add('}'.toCharacter()) + add('?'.toCharacter()) + } + } + + public static String[] termify(final String source) { + String lowercase = source.toLowerCase().trim() + + def rv = [] + int pos = 0 + int quote = -1 + + StringBuilder tmp = new StringBuilder() + while(pos < lowercase.length()) { + char c = lowercase.charAt(pos++) + if (quote < 0 && c == '"') { + quote = pos - 1 + continue + } + if (quote >= 0) { + if (c == '"') { + quote = -1 + if (tmp.length() != 0) { + rv << tmp.toString() + tmp = new StringBuilder() + } + } else + tmp.append(c) + } else if (SPLIT_CHARS.contains(c)) { + if (tmp.length() != 0) { + rv << tmp.toString() + tmp = new StringBuilder() + } + } else + tmp.append c + } + + // check if odd number of quotes and re-tokenize from last quote + if (quote >= 0) { + tmp = new StringBuilder() + pos = quote + 1 + while(pos < lowercase.length()) { + char c = lowercase.charAt(pos++) + if (SPLIT_CHARS.contains(c)) { + if (tmp.length() > 0) { + rv << tmp.toString() + tmp = new StringBuilder() + } + } else + tmp.append(c) + } + } + + if (tmp.length() > 0) + rv << tmp.toString() + + rv + } } diff --git a/core/src/test/groovy/com/muwire/core/SplitPatternTest.groovy b/core/src/test/groovy/com/muwire/core/SplitPatternTest.groovy new file mode 100644 index 00000000..10853dcd --- /dev/null +++ b/core/src/test/groovy/com/muwire/core/SplitPatternTest.groovy @@ -0,0 +1,27 @@ +package com.muwire.core + +import org.junit.Test + +class SplitPatternTest { + + @Test + void testReplaceCharacters() { + assert SplitPattern.termify("a_b.c") == ['a','b','c'] + } + + @Test + void testPhrase() { + assert SplitPattern.termify('"siamese cat"') == ['siamese cat'] + } + + @Test + void testInvalidPhrase() { + assert SplitPattern.termify('"siamese cat') == ['siamese', 'cat'] + } + + @Test + void testManyPhrases() { + assert SplitPattern.termify('"siamese cat" any cat "persian cat"') == + ['siamese cat','any','cat','persian cat'] + } +} diff --git a/gui/griffon-app/controllers/com/muwire/gui/MainFrameController.groovy b/gui/griffon-app/controllers/com/muwire/gui/MainFrameController.groovy index efa3e771..0b932e1a 100644 --- a/gui/griffon-app/controllers/com/muwire/gui/MainFrameController.groovy +++ b/gui/griffon-app/controllers/com/muwire/gui/MainFrameController.groovy @@ -107,11 +107,7 @@ class MainFrameController { searchEvent = new SearchEvent(searchHash : root, uuid : uuid, oobInfohash: true, compressedResults : true) payload = root } else { - // this can be improved a lot - def replaced = search.toLowerCase().trim().replaceAll(SplitPattern.SPLIT_PATTERN, " ") - def terms = replaced.split(" ") - def nonEmpty = [] - terms.each { if (it.length() > 0) nonEmpty << it } + def nonEmpty = SplitPattern.termify(search) payload = String.join(" ",nonEmpty).getBytes(StandardCharsets.UTF_8) searchEvent = new SearchEvent(searchTerms : nonEmpty, uuid : uuid, oobInfohash: true, searchComments : core.muOptions.searchComments, compressedResults : true) From 916fad7d9bbfe3d36241c28b3cae0a248ebd881e Mon Sep 17 00:00:00 2001 From: Zlatin Balevsky Date: Tue, 5 Nov 2019 15:54:16 +0000 Subject: [PATCH 6/6] more fake padding --- .../views/com/muwire/gui/CertificateWarningView.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gui/griffon-app/views/com/muwire/gui/CertificateWarningView.groovy b/gui/griffon-app/views/com/muwire/gui/CertificateWarningView.groovy index 6de89652..fa428e26 100644 --- a/gui/griffon-app/views/com/muwire/gui/CertificateWarningView.groovy +++ b/gui/griffon-app/views/com/muwire/gui/CertificateWarningView.groovy @@ -33,7 +33,7 @@ class CertificateWarningView { label(text : "Even if you delete the certificate from your disk, others may already have it.", constraints : gbc(gridx:0, gridy : 1, gridwidth: 2)) label(text : "If you are sure you want to do this, check the checkbox below, then click \"Certify\" again.", constraints : gbc(gridx:0, gridy: 2, gridwidth:2)) label(text : "\n", constraints : gbc(gridx:0, gridy:3)) // TODO: real padding - label(text : "I understand, do not show this warning again", constraints : gbc(gridx:0, gridy:4, anchor : GridBagConstraints.LINE_END)) + label(text : " I understand, do not show this warning again", constraints : gbc(gridx:0, gridy:4, anchor : GridBagConstraints.LINE_END)) checkbox = checkBox(constraints : gbc(gridx:1, gridy:4, anchor : GridBagConstraints.LINE_START)) panel (constraints : gbc(gridx :0, gridy : 5, gridwidth : 2)) { button(text : "Ok", dismissAction)