Merge branch 'master' of https://github.com/zlatinb/muwire
This commit is contained in:
@@ -49,10 +49,7 @@ class SearchModel {
|
||||
searchEvent = new SearchEvent(searchHash : root, uuid : UUID.randomUUID(), oobInfohash : true, compressedResults : true)
|
||||
payload = root
|
||||
} else {
|
||||
def replaced = query.toLowerCase().trim().replaceAll(SplitPattern.SPLIT_PATTERN, " ")
|
||||
def terms = replaced.split(" ")
|
||||
def nonEmpty = []
|
||||
terms.each { if (it.length() > 0) nonEmpty << it }
|
||||
def nonEmpty = SplitPattern.termify(query)
|
||||
payload = String.join(" ", nonEmpty).getBytes(StandardCharsets.UTF_8)
|
||||
searchEvent = new SearchEvent(searchTerms : nonEmpty, uuid : UUID.randomUUID(), oobInfohash: true,
|
||||
searchComments : core.muOptions.searchComments, compressedResults : true)
|
||||
|
||||
@@ -3,5 +3,89 @@ package com.muwire.core
|
||||
class SplitPattern {
|
||||
|
||||
public static final String SPLIT_PATTERN = "[\\*\\+\\-,\\.:;\\(\\)=_/\\\\\\!\\\"\\\'\\\$%\\|\\[\\]\\{\\}\\?]";
|
||||
|
||||
private static final Set<Character> SPLIT_CHARS = new HashSet<>()
|
||||
static {
|
||||
SPLIT_CHARS.with {
|
||||
add(' '.toCharacter())
|
||||
add('*'.toCharacter())
|
||||
add('+'.toCharacter())
|
||||
add('-'.toCharacter())
|
||||
add(','.toCharacter())
|
||||
add('.'.toCharacter())
|
||||
add(':'.toCharacter())
|
||||
add(';'.toCharacter())
|
||||
add('('.toCharacter())
|
||||
add(')'.toCharacter())
|
||||
add('='.toCharacter())
|
||||
add('_'.toCharacter())
|
||||
add('/'.toCharacter())
|
||||
add('\\'.toCharacter())
|
||||
add('!'.toCharacter())
|
||||
add('\''.toCharacter())
|
||||
add('$'.toCharacter())
|
||||
add('%'.toCharacter())
|
||||
add('|'.toCharacter())
|
||||
add('['.toCharacter())
|
||||
add(']'.toCharacter())
|
||||
add('{'.toCharacter())
|
||||
add('}'.toCharacter())
|
||||
add('?'.toCharacter())
|
||||
}
|
||||
}
|
||||
|
||||
public static String[] termify(final String source) {
|
||||
String lowercase = source.toLowerCase().trim()
|
||||
|
||||
def rv = []
|
||||
int pos = 0
|
||||
int quote = -1
|
||||
|
||||
StringBuilder tmp = new StringBuilder()
|
||||
while(pos < lowercase.length()) {
|
||||
char c = lowercase.charAt(pos++)
|
||||
if (quote < 0 && c == '"') {
|
||||
quote = pos - 1
|
||||
continue
|
||||
}
|
||||
if (quote >= 0) {
|
||||
if (c == '"') {
|
||||
quote = -1
|
||||
if (tmp.length() != 0) {
|
||||
rv << tmp.toString()
|
||||
tmp = new StringBuilder()
|
||||
}
|
||||
} else
|
||||
tmp.append(c)
|
||||
} else if (SPLIT_CHARS.contains(c)) {
|
||||
if (tmp.length() != 0) {
|
||||
rv << tmp.toString()
|
||||
tmp = new StringBuilder()
|
||||
}
|
||||
} else
|
||||
tmp.append c
|
||||
}
|
||||
|
||||
// check if odd number of quotes and re-tokenize from last quote
|
||||
if (quote >= 0) {
|
||||
tmp = new StringBuilder()
|
||||
pos = quote + 1
|
||||
while(pos < lowercase.length()) {
|
||||
char c = lowercase.charAt(pos++)
|
||||
if (SPLIT_CHARS.contains(c)) {
|
||||
if (tmp.length() > 0) {
|
||||
rv << tmp.toString()
|
||||
tmp = new StringBuilder()
|
||||
}
|
||||
} else
|
||||
tmp.append(c)
|
||||
}
|
||||
}
|
||||
|
||||
if (tmp.length() > 0)
|
||||
rv << tmp.toString()
|
||||
|
||||
rv
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -159,6 +159,7 @@ class ConnectionAcceptor {
|
||||
}
|
||||
} catch (Exception ex) {
|
||||
log.log(Level.WARNING, "incoming connection failed",ex)
|
||||
e.getOutputStream().close()
|
||||
e.close()
|
||||
eventBus.publish new ConnectionEvent(endpoint: e, incoming: true, leaf: null, status: ConnectionAttemptStatus.FAILED)
|
||||
}
|
||||
@@ -207,7 +208,7 @@ class ConnectionAcceptor {
|
||||
os.writeShort(json.bytes.length)
|
||||
os.write(json.bytes)
|
||||
}
|
||||
e.outputStream.flush()
|
||||
e.outputStream.close()
|
||||
e.close()
|
||||
eventBus.publish(new ConnectionEvent(endpoint: e, incoming: true, leaf: leaf, status: ConnectionAttemptStatus.REJECTED))
|
||||
}
|
||||
|
||||
@@ -31,25 +31,48 @@ class SearchIndex {
|
||||
}
|
||||
}
|
||||
|
||||
private static String[] split(String source) {
|
||||
source = source.replaceAll(SplitPattern.SPLIT_PATTERN, " ").toLowerCase()
|
||||
String [] split = source.split(" ")
|
||||
private static String[] split(final String source) {
|
||||
// first split by split pattern
|
||||
String sourceSplit = source.replaceAll(SplitPattern.SPLIT_PATTERN, " ").toLowerCase()
|
||||
String [] split = sourceSplit.split(" ")
|
||||
def rv = []
|
||||
split.each { if (it.length() > 0) rv << it }
|
||||
|
||||
// then just by ' '
|
||||
source.split(' ').each { if (it.length() > 0) rv << it }
|
||||
|
||||
// and add original string
|
||||
rv << source
|
||||
rv.toArray(new String[0])
|
||||
}
|
||||
|
||||
String[] search(List<String> terms) {
|
||||
Set<String> rv = null;
|
||||
|
||||
Set<String> powerSet = new HashSet<>()
|
||||
terms.each {
|
||||
powerSet.addAll(it.toLowerCase().split(' '))
|
||||
}
|
||||
|
||||
powerSet.each {
|
||||
Set<String> forWord = keywords.getOrDefault(it,[])
|
||||
if (rv == null) {
|
||||
rv = new HashSet<>(forWord)
|
||||
} else {
|
||||
rv.retainAll(forWord)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// now, filter by terms
|
||||
for (Iterator<String> iter = rv.iterator(); iter.hasNext();) {
|
||||
String candidate = iter.next()
|
||||
candidate = candidate.toLowerCase()
|
||||
boolean keep = true
|
||||
terms.each {
|
||||
keep &= candidate.contains(it)
|
||||
}
|
||||
if (!keep)
|
||||
iter.remove()
|
||||
}
|
||||
|
||||
if (rv != null)
|
||||
|
||||
27
core/src/test/groovy/com/muwire/core/SplitPatternTest.groovy
Normal file
27
core/src/test/groovy/com/muwire/core/SplitPatternTest.groovy
Normal file
@@ -0,0 +1,27 @@
|
||||
package com.muwire.core
|
||||
|
||||
import org.junit.Test
|
||||
|
||||
class SplitPatternTest {
|
||||
|
||||
@Test
|
||||
void testReplaceCharacters() {
|
||||
assert SplitPattern.termify("a_b.c") == ['a','b','c']
|
||||
}
|
||||
|
||||
@Test
|
||||
void testPhrase() {
|
||||
assert SplitPattern.termify('"siamese cat"') == ['siamese cat']
|
||||
}
|
||||
|
||||
@Test
|
||||
void testInvalidPhrase() {
|
||||
assert SplitPattern.termify('"siamese cat') == ['siamese', 'cat']
|
||||
}
|
||||
|
||||
@Test
|
||||
void testManyPhrases() {
|
||||
assert SplitPattern.termify('"siamese cat" any cat "persian cat"') ==
|
||||
['siamese cat','any','cat','persian cat']
|
||||
}
|
||||
}
|
||||
@@ -95,7 +95,7 @@ class ConnectionAcceptorTest {
|
||||
connectionEstablisher = connectionEstablisherMock.proxyInstance()
|
||||
|
||||
acceptor = new ConnectionAcceptor(eventBus, connectionManager, settings, i2pAcceptor,
|
||||
hostCache, trustService, searchManager, uploadManager, null, connectionEstablisher)
|
||||
hostCache, trustService, searchManager, uploadManager, null, connectionEstablisher, null)
|
||||
acceptor.start()
|
||||
Thread.sleep(100)
|
||||
}
|
||||
|
||||
@@ -149,7 +149,7 @@ class FileManagerTest {
|
||||
manager.onFileLoadedEvent new FileLoadedEvent(loadedFile : sf1)
|
||||
manager.onFileLoadedEvent new FileLoadedEvent(loadedFile : sf2)
|
||||
|
||||
manager.onFileUnsharedEvent new FileUnsharedEvent(unsharedFile: sf2)
|
||||
manager.onFileUnsharedEvent new FileUnsharedEvent(deleted : true, unsharedFile: sf2)
|
||||
|
||||
manager.onSearchEvent new SearchEvent(searchHash : ih.getRoot())
|
||||
Thread.sleep(20)
|
||||
@@ -170,7 +170,7 @@ class FileManagerTest {
|
||||
SharedFile sf2 = new SharedFile(f2, ih2, 0)
|
||||
manager.onFileLoadedEvent new FileLoadedEvent(loadedFile: sf2)
|
||||
|
||||
manager.onFileUnsharedEvent new FileUnsharedEvent(unsharedFile: sf2)
|
||||
manager.onFileUnsharedEvent new FileUnsharedEvent(deleted : true, unsharedFile: sf2)
|
||||
|
||||
// 1 match left
|
||||
manager.onSearchEvent new SearchEvent(searchTerms: ["c"])
|
||||
|
||||
@@ -90,4 +90,34 @@ class SearchIndexTest {
|
||||
def found = index.search(["muwire", "0", "3", "jar"])
|
||||
assert found.size() == 1
|
||||
}
|
||||
|
||||
@Test
|
||||
void testOriginalText() {
|
||||
initIndex(["a-b c-d"])
|
||||
def found = index.search(['a-b'])
|
||||
assert found.size() == 1
|
||||
found = index.search(['c-d'])
|
||||
assert found.size() == 1
|
||||
}
|
||||
|
||||
@Test
|
||||
void testPhrase() {
|
||||
initIndex(["a-b c-d e-f"])
|
||||
def found = index.search(['a-b c-d'])
|
||||
assert found.size() == 1
|
||||
assert index.search(['c-d e-f']).size() == 1
|
||||
assert index.search(['a-b e-f']).size() == 0
|
||||
}
|
||||
|
||||
@Test
|
||||
void testMixedPhraseAndKeyword() {
|
||||
initIndex(["My siamese cat video",
|
||||
"My cat video of a siamese",
|
||||
"Video of a siamese cat"])
|
||||
|
||||
assert index.search(['cat video']).size() == 2
|
||||
assert index.search(['cat video','siamese']).size() == 2
|
||||
assert index.search(['cat', 'video siamese']).size() == 0
|
||||
assert index.search(['cat','video','siamese']).size() == 3
|
||||
}
|
||||
}
|
||||
|
||||
@@ -107,11 +107,7 @@ class MainFrameController {
|
||||
searchEvent = new SearchEvent(searchHash : root, uuid : uuid, oobInfohash: true, compressedResults : true)
|
||||
payload = root
|
||||
} else {
|
||||
// this can be improved a lot
|
||||
def replaced = search.toLowerCase().trim().replaceAll(SplitPattern.SPLIT_PATTERN, " ")
|
||||
def terms = replaced.split(" ")
|
||||
def nonEmpty = []
|
||||
terms.each { if (it.length() > 0) nonEmpty << it }
|
||||
def nonEmpty = SplitPattern.termify(search)
|
||||
payload = String.join(" ",nonEmpty).getBytes(StandardCharsets.UTF_8)
|
||||
searchEvent = new SearchEvent(searchTerms : nonEmpty, uuid : uuid, oobInfohash: true,
|
||||
searchComments : core.muOptions.searchComments, compressedResults : true)
|
||||
|
||||
@@ -33,7 +33,7 @@ class CertificateWarningView {
|
||||
label(text : "Even if you delete the certificate from your disk, others may already have it.", constraints : gbc(gridx:0, gridy : 1, gridwidth: 2))
|
||||
label(text : "If you are sure you want to do this, check the checkbox below, then click \"Certify\" again.", constraints : gbc(gridx:0, gridy: 2, gridwidth:2))
|
||||
label(text : "\n", constraints : gbc(gridx:0, gridy:3)) // TODO: real padding
|
||||
label(text : "I understand, do not show this warning again", constraints : gbc(gridx:0, gridy:4, anchor : GridBagConstraints.LINE_END))
|
||||
label(text : " I understand, do not show this warning again", constraints : gbc(gridx:0, gridy:4, anchor : GridBagConstraints.LINE_END))
|
||||
checkbox = checkBox(constraints : gbc(gridx:1, gridy:4, anchor : GridBagConstraints.LINE_START))
|
||||
panel (constraints : gbc(gridx :0, gridy : 5, gridwidth : 2)) {
|
||||
button(text : "Ok", dismissAction)
|
||||
|
||||
Reference in New Issue
Block a user