This commit is contained in:
Zlatin Balevsky
2019-11-06 05:42:14 +00:00
10 changed files with 176 additions and 18 deletions

View File

@@ -49,10 +49,7 @@ class SearchModel {
searchEvent = new SearchEvent(searchHash : root, uuid : UUID.randomUUID(), oobInfohash : true, compressedResults : true)
payload = root
} else {
def replaced = query.toLowerCase().trim().replaceAll(SplitPattern.SPLIT_PATTERN, " ")
def terms = replaced.split(" ")
def nonEmpty = []
terms.each { if (it.length() > 0) nonEmpty << it }
def nonEmpty = SplitPattern.termify(query)
payload = String.join(" ", nonEmpty).getBytes(StandardCharsets.UTF_8)
searchEvent = new SearchEvent(searchTerms : nonEmpty, uuid : UUID.randomUUID(), oobInfohash: true,
searchComments : core.muOptions.searchComments, compressedResults : true)

View File

@@ -3,5 +3,89 @@ package com.muwire.core
class SplitPattern {
public static final String SPLIT_PATTERN = "[\\*\\+\\-,\\.:;\\(\\)=_/\\\\\\!\\\"\\\'\\\$%\\|\\[\\]\\{\\}\\?]";
private static final Set<Character> SPLIT_CHARS = new HashSet<>()
static {
SPLIT_CHARS.with {
add(' '.toCharacter())
add('*'.toCharacter())
add('+'.toCharacter())
add('-'.toCharacter())
add(','.toCharacter())
add('.'.toCharacter())
add(':'.toCharacter())
add(';'.toCharacter())
add('('.toCharacter())
add(')'.toCharacter())
add('='.toCharacter())
add('_'.toCharacter())
add('/'.toCharacter())
add('\\'.toCharacter())
add('!'.toCharacter())
add('\''.toCharacter())
add('$'.toCharacter())
add('%'.toCharacter())
add('|'.toCharacter())
add('['.toCharacter())
add(']'.toCharacter())
add('{'.toCharacter())
add('}'.toCharacter())
add('?'.toCharacter())
}
}
public static String[] termify(final String source) {
String lowercase = source.toLowerCase().trim()
def rv = []
int pos = 0
int quote = -1
StringBuilder tmp = new StringBuilder()
while(pos < lowercase.length()) {
char c = lowercase.charAt(pos++)
if (quote < 0 && c == '"') {
quote = pos - 1
continue
}
if (quote >= 0) {
if (c == '"') {
quote = -1
if (tmp.length() != 0) {
rv << tmp.toString()
tmp = new StringBuilder()
}
} else
tmp.append(c)
} else if (SPLIT_CHARS.contains(c)) {
if (tmp.length() != 0) {
rv << tmp.toString()
tmp = new StringBuilder()
}
} else
tmp.append c
}
// check if odd number of quotes and re-tokenize from last quote
if (quote >= 0) {
tmp = new StringBuilder()
pos = quote + 1
while(pos < lowercase.length()) {
char c = lowercase.charAt(pos++)
if (SPLIT_CHARS.contains(c)) {
if (tmp.length() > 0) {
rv << tmp.toString()
tmp = new StringBuilder()
}
} else
tmp.append(c)
}
}
if (tmp.length() > 0)
rv << tmp.toString()
rv
}
}

View File

@@ -159,6 +159,7 @@ class ConnectionAcceptor {
}
} catch (Exception ex) {
log.log(Level.WARNING, "incoming connection failed",ex)
e.getOutputStream().close()
e.close()
eventBus.publish new ConnectionEvent(endpoint: e, incoming: true, leaf: null, status: ConnectionAttemptStatus.FAILED)
}
@@ -207,7 +208,7 @@ class ConnectionAcceptor {
os.writeShort(json.bytes.length)
os.write(json.bytes)
}
e.outputStream.flush()
e.outputStream.close()
e.close()
eventBus.publish(new ConnectionEvent(endpoint: e, incoming: true, leaf: leaf, status: ConnectionAttemptStatus.REJECTED))
}

View File

@@ -31,25 +31,48 @@ class SearchIndex {
}
}
private static String[] split(String source) {
source = source.replaceAll(SplitPattern.SPLIT_PATTERN, " ").toLowerCase()
String [] split = source.split(" ")
private static String[] split(final String source) {
// first split by split pattern
String sourceSplit = source.replaceAll(SplitPattern.SPLIT_PATTERN, " ").toLowerCase()
String [] split = sourceSplit.split(" ")
def rv = []
split.each { if (it.length() > 0) rv << it }
// then just by ' '
source.split(' ').each { if (it.length() > 0) rv << it }
// and add original string
rv << source
rv.toArray(new String[0])
}
String[] search(List<String> terms) {
Set<String> rv = null;
Set<String> powerSet = new HashSet<>()
terms.each {
powerSet.addAll(it.toLowerCase().split(' '))
}
powerSet.each {
Set<String> forWord = keywords.getOrDefault(it,[])
if (rv == null) {
rv = new HashSet<>(forWord)
} else {
rv.retainAll(forWord)
}
}
// now, filter by terms
for (Iterator<String> iter = rv.iterator(); iter.hasNext();) {
String candidate = iter.next()
candidate = candidate.toLowerCase()
boolean keep = true
terms.each {
keep &= candidate.contains(it)
}
if (!keep)
iter.remove()
}
if (rv != null)

View File

@@ -0,0 +1,27 @@
package com.muwire.core
import org.junit.Test
class SplitPatternTest {
@Test
void testReplaceCharacters() {
assert SplitPattern.termify("a_b.c") == ['a','b','c']
}
@Test
void testPhrase() {
assert SplitPattern.termify('"siamese cat"') == ['siamese cat']
}
@Test
void testInvalidPhrase() {
assert SplitPattern.termify('"siamese cat') == ['siamese', 'cat']
}
@Test
void testManyPhrases() {
assert SplitPattern.termify('"siamese cat" any cat "persian cat"') ==
['siamese cat','any','cat','persian cat']
}
}

View File

@@ -95,7 +95,7 @@ class ConnectionAcceptorTest {
connectionEstablisher = connectionEstablisherMock.proxyInstance()
acceptor = new ConnectionAcceptor(eventBus, connectionManager, settings, i2pAcceptor,
hostCache, trustService, searchManager, uploadManager, null, connectionEstablisher)
hostCache, trustService, searchManager, uploadManager, null, connectionEstablisher, null)
acceptor.start()
Thread.sleep(100)
}

View File

@@ -149,7 +149,7 @@ class FileManagerTest {
manager.onFileLoadedEvent new FileLoadedEvent(loadedFile : sf1)
manager.onFileLoadedEvent new FileLoadedEvent(loadedFile : sf2)
manager.onFileUnsharedEvent new FileUnsharedEvent(unsharedFile: sf2)
manager.onFileUnsharedEvent new FileUnsharedEvent(deleted : true, unsharedFile: sf2)
manager.onSearchEvent new SearchEvent(searchHash : ih.getRoot())
Thread.sleep(20)
@@ -170,7 +170,7 @@ class FileManagerTest {
SharedFile sf2 = new SharedFile(f2, ih2, 0)
manager.onFileLoadedEvent new FileLoadedEvent(loadedFile: sf2)
manager.onFileUnsharedEvent new FileUnsharedEvent(unsharedFile: sf2)
manager.onFileUnsharedEvent new FileUnsharedEvent(deleted : true, unsharedFile: sf2)
// 1 match left
manager.onSearchEvent new SearchEvent(searchTerms: ["c"])

View File

@@ -90,4 +90,34 @@ class SearchIndexTest {
def found = index.search(["muwire", "0", "3", "jar"])
assert found.size() == 1
}
@Test
void testOriginalText() {
initIndex(["a-b c-d"])
def found = index.search(['a-b'])
assert found.size() == 1
found = index.search(['c-d'])
assert found.size() == 1
}
@Test
void testPhrase() {
initIndex(["a-b c-d e-f"])
def found = index.search(['a-b c-d'])
assert found.size() == 1
assert index.search(['c-d e-f']).size() == 1
assert index.search(['a-b e-f']).size() == 0
}
@Test
void testMixedPhraseAndKeyword() {
initIndex(["My siamese cat video",
"My cat video of a siamese",
"Video of a siamese cat"])
assert index.search(['cat video']).size() == 2
assert index.search(['cat video','siamese']).size() == 2
assert index.search(['cat', 'video siamese']).size() == 0
assert index.search(['cat','video','siamese']).size() == 3
}
}

View File

@@ -107,11 +107,7 @@ class MainFrameController {
searchEvent = new SearchEvent(searchHash : root, uuid : uuid, oobInfohash: true, compressedResults : true)
payload = root
} else {
// this can be improved a lot
def replaced = search.toLowerCase().trim().replaceAll(SplitPattern.SPLIT_PATTERN, " ")
def terms = replaced.split(" ")
def nonEmpty = []
terms.each { if (it.length() > 0) nonEmpty << it }
def nonEmpty = SplitPattern.termify(search)
payload = String.join(" ",nonEmpty).getBytes(StandardCharsets.UTF_8)
searchEvent = new SearchEvent(searchTerms : nonEmpty, uuid : uuid, oobInfohash: true,
searchComments : core.muOptions.searchComments, compressedResults : true)

View File

@@ -33,7 +33,7 @@ class CertificateWarningView {
label(text : "Even if you delete the certificate from your disk, others may already have it.", constraints : gbc(gridx:0, gridy : 1, gridwidth: 2))
label(text : "If you are sure you want to do this, check the checkbox below, then click \"Certify\" again.", constraints : gbc(gridx:0, gridy: 2, gridwidth:2))
label(text : "\n", constraints : gbc(gridx:0, gridy:3)) // TODO: real padding
label(text : "I understand, do not show this warning again", constraints : gbc(gridx:0, gridy:4, anchor : GridBagConstraints.LINE_END))
label(text : " I understand, do not show this warning again", constraints : gbc(gridx:0, gridy:4, anchor : GridBagConstraints.LINE_END))
checkbox = checkBox(constraints : gbc(gridx:1, gridy:4, anchor : GridBagConstraints.LINE_START))
panel (constraints : gbc(gridx :0, gridy : 5, gridwidth : 2)) {
button(text : "Ok", dismissAction)