Skip to content

Commit 777c42e

Browse files
committed
GEODE-11: let query to use index's analyzer; add tests for customized analyzer and analyzer per field
1 parent 46eeb39 commit 777c42e

10 files changed

Lines changed: 139 additions & 23 deletions

File tree

geode-lucene/src/main/java/com/gemstone/gemfire/cache/lucene/internal/StringQueryProvider.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,13 @@ public synchronized Query getQuery(LuceneIndex index) throws QueryException {
6565
if (luceneQuery == null) {
6666
String[] fields = index.getFieldNames();
6767

68-
//TODO get the analyzer from the index
69-
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, new StandardAnalyzer());
68+
LuceneIndexImpl indexImpl = (LuceneIndexImpl)index;
69+
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, indexImpl.getAnalyzer());
7070
try {
7171
luceneQuery = parser.parse(query);
72+
if (logger.isDebugEnabled()) {
73+
logger.debug("User query "+query+" is parsed to be: "+luceneQuery);
74+
}
7275
} catch (ParseException e) {
7376
logger.debug("Malformed lucene query: " + query, e);
7477
throw new QueryException(e);

geode-lucene/src/main/java/com/gemstone/gemfire/cache/lucene/internal/repository/IndexRepositoryImpl.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@
2020
package com.gemstone.gemfire.cache.lucene.internal.repository;
2121

2222
import java.io.IOException;
23+
import java.util.Iterator;
2324

25+
import org.apache.logging.log4j.Logger;
2426
import org.apache.lucene.document.Document;
2527
import org.apache.lucene.index.IndexWriter;
2628
import org.apache.lucene.index.Term;
@@ -33,6 +35,7 @@
3335
import com.gemstone.gemfire.cache.Region;
3436
import com.gemstone.gemfire.cache.lucene.internal.repository.serializer.LuceneSerializer;
3537
import com.gemstone.gemfire.cache.lucene.internal.repository.serializer.SerializerUtil;
38+
import com.gemstone.gemfire.internal.logging.LogService;
3639

3740
/**
3841
* A repository that writes to a single lucene index writer
@@ -48,6 +51,8 @@ public class IndexRepositoryImpl implements IndexRepository {
4851
private final SearcherManager searcherManager;
4952
private Region<?,?> region;
5053

54+
private static final Logger logger = LogService.getLogger();
55+
5156
public IndexRepositoryImpl(Region<?,?> region, IndexWriter writer, LuceneSerializer serializer) throws IOException {
5257
this.region = region;
5358
this.writer = writer;
@@ -85,6 +90,9 @@ public void query(Query query, int limit, IndexResultCollector collector) throws
8590
for(ScoreDoc scoreDoc : docs.scoreDocs) {
8691
Document doc = searcher.doc(scoreDoc.doc);
8792
Object key = SerializerUtil.getKey(doc);
93+
if (logger.isDebugEnabled()) {
94+
logger.debug("query found doc:"+doc+":"+scoreDoc);
95+
}
8896
collector.collect(key, scoreDoc.score);
8997
}
9098
} finally {

geode-lucene/src/main/java/com/gemstone/gemfire/cache/lucene/internal/repository/serializer/HeterogeneousLuceneSerializer.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,10 @@
2020

2121
import java.util.Map;
2222

23+
import org.apache.logging.log4j.Logger;
2324
import org.apache.lucene.document.Document;
2425

26+
import com.gemstone.gemfire.internal.logging.LogService;
2527
import com.gemstone.gemfire.internal.util.concurrent.CopyOnWriteWeakHashMap;
2628
import com.gemstone.gemfire.pdx.PdxInstance;
2729

@@ -48,6 +50,8 @@ public class HeterogeneousLuceneSerializer implements LuceneSerializer {
4850
*/
4951
private Map<Class<?>, LuceneSerializer> mappers = new CopyOnWriteWeakHashMap<Class<?>, LuceneSerializer>();
5052

53+
private static final Logger logger = LogService.getLogger();
54+
5155
public HeterogeneousLuceneSerializer(String[] indexedFields) {
5256
this.indexedFields = indexedFields;
5357
pdxMapper = new PdxLuceneSerializer(indexedFields);
@@ -59,6 +63,9 @@ public void toDocument(Object value, Document doc) {
5963
LuceneSerializer mapper = getFieldMapper(value);
6064

6165
mapper.toDocument(value, doc);
66+
if (logger.isDebugEnabled()) {
67+
logger.debug("HeterogeneousLuceneSerializer.toDocument:"+doc);
68+
}
6269
}
6370

6471
/**

geode-lucene/src/main/java/com/gemstone/gemfire/cache/lucene/internal/repository/serializer/PdxLuceneSerializer.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,10 @@
1919

2020
package com.gemstone.gemfire.cache.lucene.internal.repository.serializer;
2121

22+
import org.apache.logging.log4j.Logger;
2223
import org.apache.lucene.document.Document;
2324

25+
import com.gemstone.gemfire.internal.logging.LogService;
2426
import com.gemstone.gemfire.pdx.PdxInstance;
2527

2628
/**
@@ -30,6 +32,8 @@ class PdxLuceneSerializer implements LuceneSerializer {
3032

3133
private String[] indexedFields;
3234

35+
private static final Logger logger = LogService.getLogger();
36+
3337
public PdxLuceneSerializer(String[] indexedFields) {
3438
this.indexedFields = indexedFields;
3539
}
@@ -43,5 +47,8 @@ public void toDocument(Object value, Document doc) {
4347
SerializerUtil.addField(doc, field, fieldValue);
4448
}
4549
}
50+
if (logger.isDebugEnabled()) {
51+
logger.debug("PdxLuceneSerializer.toDocument:"+doc);
52+
}
4653
}
4754
}

geode-lucene/src/main/java/com/gemstone/gemfire/cache/lucene/internal/repository/serializer/ReflectionLuceneSerializer.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,11 @@
2525
import java.util.HashSet;
2626
import java.util.Set;
2727

28+
import org.apache.logging.log4j.Logger;
2829
import org.apache.lucene.document.Document;
2930

31+
import com.gemstone.gemfire.internal.logging.LogService;
32+
3033
/**
3134
* A lucene serializer that handles a single class and can
3235
* map an instance of that class to a document using reflection.
@@ -35,6 +38,8 @@ class ReflectionLuceneSerializer implements LuceneSerializer {
3538

3639
private Field[] fields;
3740

41+
private static final Logger logger = LogService.getLogger();
42+
3843
public ReflectionLuceneSerializer(Class<? extends Object> clazz,
3944
String[] indexedFields) {
4045
Set<String> fieldSet = new HashSet<String>();
@@ -70,5 +75,8 @@ public void toDocument(Object value, Document doc) {
7075
//TODO - what to do if we can't read a field?
7176
}
7277
}
78+
if (logger.isDebugEnabled()) {
79+
logger.debug("ReflectionLuceneSerializer.toDocument:"+doc);
80+
}
7381
}
7482
}

geode-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/LuceneIntegrationTest.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ public void createCache() {
6161
protected CacheFactory getCacheFactory() {
6262
CacheFactory cf = new CacheFactory();
6363
cf.set("mcast-port", "0");
64+
cf.set("log-level", System.getProperty("logLevel", "info"));
6465
cf.set("locators", "");
6566
return cf;
6667
}

geode-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/LuceneQueriesIntegrationTest.java

Lines changed: 88 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,28 +18,31 @@
1818

1919
import static com.gemstone.gemfire.cache.lucene.test.LuceneTestUtilities.verifyQueryKeys;
2020
import static org.hamcrest.Matchers.isA;
21-
import static org.junit.Assert.*;
21+
import static org.junit.Assert.assertEquals;
2222

2323
import java.util.HashMap;
2424
import java.util.Map;
2525

26-
import com.gemstone.gemfire.cache.Region;
27-
import com.gemstone.gemfire.cache.RegionShortcut;
28-
import com.gemstone.gemfire.cache.execute.FunctionException;
29-
import com.gemstone.gemfire.cache.lucene.test.LuceneTestUtilities;
30-
import com.gemstone.gemfire.cache.lucene.test.TestObject;
31-
import com.gemstone.gemfire.cache.query.QueryException;
32-
import com.gemstone.gemfire.test.junit.categories.IntegrationTest;
33-
3426
import org.apache.lucene.analysis.Analyzer;
27+
import org.apache.lucene.analysis.TokenStream;
28+
import org.apache.lucene.analysis.Tokenizer;
3529
import org.apache.lucene.analysis.core.KeywordAnalyzer;
30+
import org.apache.lucene.analysis.core.LowerCaseFilter;
3631
import org.apache.lucene.analysis.standard.StandardAnalyzer;
32+
import org.apache.lucene.analysis.util.CharTokenizer;
3733
import org.apache.lucene.queryparser.classic.ParseException;
3834
import org.junit.Rule;
3935
import org.junit.Test;
4036
import org.junit.experimental.categories.Category;
4137
import org.junit.rules.ExpectedException;
4238

39+
import com.gemstone.gemfire.cache.Region;
40+
import com.gemstone.gemfire.cache.RegionShortcut;
41+
import com.gemstone.gemfire.cache.execute.FunctionException;
42+
import com.gemstone.gemfire.cache.lucene.test.TestObject;
43+
import com.gemstone.gemfire.cache.query.QueryException;
44+
import com.gemstone.gemfire.test.junit.categories.IntegrationTest;
45+
4346
/**
4447
* This class contains tests of lucene queries that can fit
4548
*/
@@ -62,21 +65,76 @@ public void shouldNotTokenizeWordsWithKeywordAnalyzer() throws ParseException {
6265

6366
//Put two values with some of the same tokens
6467
String value1 = "one three";
65-
region.put("A", new TestObject(value1, value1));
6668
String value2 = "one two three";
69+
String value3 = "one@three";
70+
region.put("A", new TestObject(value1, value1));
6771
region.put("B", new TestObject(value2, value2));
72+
region.put("C", new TestObject(value3, value3));
6873

74+
// The value will be tokenized into following documents using the analyzers:
75+
// <field1:one three> <field2:one three>
76+
// <field1:one two three> <field2:one two three>
77+
// <field1:one@three> <field2:one@three>
78+
6979
index.waitUntilFlushed(60000);
7080

71-
//Using the standard analyzer, this query will match both results
72-
verifyQuery("field1:\"one three\"", "A", "B");
73-
74-
//Using the keyword analyzer, this query will only match the entry that exactly matches
81+
// standard analyzer with double quote
82+
// this query string will be parsed as "one three"
83+
// but standard analyzer will parse value "one@three" to be "one three"
84+
// query will be--fields1:"one three"
85+
// so C will be hit by query
86+
verifyQuery("field1:\"one three\"", "A", "C");
87+
88+
// standard analyzer will not tokenize by '_'
89+
// this query string will be parsed as "one_three"
90+
// query will be--field1:one_three
91+
verifyQuery("field1:one_three");
92+
93+
// standard analyzer will tokenize by '@'
94+
// this query string will be parsed as "one" "three"
95+
// query will be--field1:one field1:three
96+
verifyQuery("field1:one@three", "A", "B", "C");
97+
98+
// keyword analyzer, this query will only match the entry that exactly matches
99+
// this query string will be parsed as "one three"
100+
// but keyword analyzer will parse one@three to be "one three"
101+
// query will be--field2:one three
75102
verifyQuery("field2:\"one three\"", "A");
76103

77-
104+
// keyword analyzer without double quote. It should be the same as
105+
// with double quote
106+
// query will be--field2:one@three
107+
verifyQuery("field2:one@three", "C");
78108
}
79109

110+
@Test()
111+
public void shouldTokenizeUsingMyCharacterAnalyser() throws ParseException {
112+
Map<String, Analyzer> fields = new HashMap<String, Analyzer>();
113+
// not to specify field1's analyzer, it should use standard analyzer
114+
// Note: fields has to contain "field1", otherwise, field1 will not be tokenized
115+
fields.put("field1", null);
116+
fields.put("field2", new MyCharacterAnalyzer());
117+
luceneService.createIndex(INDEX_NAME, REGION_NAME, fields);
118+
Region region = cache.createRegionFactory(RegionShortcut.PARTITION)
119+
.create(REGION_NAME);
120+
final LuceneIndex index = luceneService.getIndex(INDEX_NAME, REGION_NAME);
121+
122+
//Put two values with some of the same tokens
123+
String value1 = "one three";
124+
String value4 = "two_four";
125+
String value3 = "two@four";
126+
region.put("A", new TestObject(value1, value4));
127+
region.put("B", new TestObject(value1, value3));
128+
region.put("C", new TestObject(value3, value3));
129+
region.put("D", new TestObject(value4, value4));
130+
131+
index.waitUntilFlushed(60000);
132+
133+
verifyQuery("field1:one AND field2:two_four", "A");
134+
verifyQuery("field1:one AND field2:two", "A");
135+
verifyQuery("field1:three AND field2:four", "A");
136+
}
137+
80138
@Test()
81139
public void throwFunctionExceptionWhenGivenBadQuery() {
82140
LuceneService luceneService = LuceneServiceProvider.get(cache);
@@ -109,5 +167,20 @@ private void verifyQuery(String query, String ... expectedKeys) throws ParseExce
109167
verifyQueryKeys(queryWithStandardAnalyzer, expectedKeys);
110168
}
111169

170+
private static class MyCharacterTokenizer extends CharTokenizer {
171+
@Override
172+
protected boolean isTokenChar(final int character) {
173+
return '_' != character;
174+
}
175+
}
176+
177+
private static class MyCharacterAnalyzer extends Analyzer {
178+
@Override
179+
protected TokenStreamComponents createComponents(final String field) {
180+
Tokenizer tokenizer = new MyCharacterTokenizer();
181+
TokenStream filter = new LowerCaseFilter(tokenizer);
182+
return new TokenStreamComponents(tokenizer, filter);
183+
}
184+
}
112185

113186
}

geode-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/StringQueryProviderJUnitTest.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020

2121
import static org.junit.Assert.assertEquals;
2222

23+
import org.apache.lucene.analysis.Analyzer;
24+
import org.apache.lucene.analysis.standard.StandardAnalyzer;
2325
import org.apache.lucene.search.Query;
2426
import org.junit.Assert;
2527
import org.junit.Before;
@@ -36,12 +38,14 @@
3638
@Category(UnitTest.class)
3739
public class StringQueryProviderJUnitTest {
3840

39-
private LuceneIndex mockIndex;
41+
private LuceneIndexImpl mockIndex;
4042

4143
@Before
4244
public void initMocksAndCommonObjects() {
43-
mockIndex = Mockito.mock(LuceneIndex.class, "mockIndex");
45+
mockIndex = Mockito.mock(LuceneIndexImpl.class, "mockIndex");
4446
String[] fields = { "field-1", "field-2" };
47+
Analyzer analyzer = new StandardAnalyzer();
48+
Mockito.doReturn(analyzer).when(mockIndex).getAnalyzer();
4549
Mockito.doReturn(fields).when(mockIndex).getFieldNames();
4650
Mockito.doReturn("mockIndex").when(mockIndex).getName();
4751
Mockito.doReturn("mockRegionPath").when(mockIndex).getRegionPath();

geode-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/internal/distributed/LuceneFunctionJUnitTest.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import com.gemstone.gemfire.cache.lucene.LuceneQueryProvider;
3535
import com.gemstone.gemfire.cache.lucene.internal.InternalLuceneIndex;
3636
import com.gemstone.gemfire.cache.lucene.internal.InternalLuceneService;
37+
import com.gemstone.gemfire.cache.lucene.internal.LuceneIndexImpl;
3738
import com.gemstone.gemfire.cache.lucene.internal.StringQueryProvider;
3839
import com.gemstone.gemfire.cache.lucene.internal.repository.IndexRepository;
3940
import com.gemstone.gemfire.cache.lucene.internal.repository.IndexResultCollector;
@@ -44,11 +45,14 @@
4445
import com.gemstone.gemfire.internal.cache.execute.InternalRegionFunctionContext;
4546
import com.gemstone.gemfire.test.junit.categories.UnitTest;
4647

48+
import org.apache.lucene.analysis.Analyzer;
49+
import org.apache.lucene.analysis.standard.StandardAnalyzer;
4750
import org.apache.lucene.search.Query;
4851
import org.junit.Before;
4952
import org.junit.Test;
5053
import org.junit.experimental.categories.Category;
5154
import org.mockito.ArgumentCaptor;
55+
import org.mockito.Mockito;
5256

5357
@Category(UnitTest.class)
5458
public class LuceneFunctionJUnitTest {
@@ -70,7 +74,7 @@ public class LuceneFunctionJUnitTest {
7074
IndexRepository mockRepository2;
7175
IndexResultCollector mockCollector;
7276
InternalLuceneService mockService;
73-
InternalLuceneIndex mockIndex;
77+
LuceneIndexImpl mockIndex;
7478

7579
ArrayList<IndexRepository> repos;
7680
LuceneFunctionContext<IndexResultCollector> searchArgs;
@@ -263,10 +267,11 @@ public void createMocksAndCommonObjects() throws Exception {
263267
repos.add(mockRepository1);
264268
repos.add(mockRepository2);
265269

266-
mockIndex = mock(InternalLuceneIndex.class);
270+
mockIndex = mock(LuceneIndexImpl.class);
267271
mockService = mock(InternalLuceneService.class);
268272
mockCache = mock(InternalCache.class);
269-
273+
Analyzer analyzer = new StandardAnalyzer();
274+
Mockito.doReturn(analyzer).when(mockIndex).getAnalyzer();
270275
queryProvider = new StringQueryProvider("gemfire:lucene");
271276

272277
searchArgs = new LuceneFunctionContext<IndexResultCollector>(queryProvider, "indexName");

geode-lucene/src/test/java/com/gemstone/gemfire/cache/lucene/test/LuceneTestUtilities.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ public static void verifyIndexFinishFlushing(Cache cache, String indexName, Stri
7575
*/
7676
public static <K> void verifyQueryKeys(LuceneQuery<K,Object> query,K ... expectedKeys) {
7777
Set<K> expectedKeySet = new HashSet<>(Arrays.asList(expectedKeys));
78-
Set<K> actualKeySet = new HashSet<>(Arrays.asList(expectedKeys));
78+
Set<K> actualKeySet = new HashSet<>();
7979
final LuceneQueryResults<K, Object> results = query.search();
8080
while(results.hasNextPage()) {
8181
results.getNextPage().stream()

0 commit comments

Comments
 (0)