import pytest
import redis
import bz2
import csv
import time
import os
from io import TextIOWrapper
from .conftest import skip_ifmodversion_lt, default_redismod_url
from redis import Redis
import redis.commands.search
from redis.commands.json.path import Path
from redis.commands.search import Search
from redis.commands.search.field import (
GeoField,
NumericField,
TagField,
TextField
)
from redis.commands.search.query import (
GeoFilter,
NumericFilter,
Query
)
from redis.commands.search.result import Result
from redis.commands.search.indexDefinition import IndexDefinition, IndexType
from redis.commands.search.suggestion import Suggestion
import redis.commands.search.aggregation as aggregations
import redis.commands.search.reducers as reducers
WILL_PLAY_TEXT = (
os.path.abspath(
os.path.join(
os.path.dirname(__file__),
"testdata",
"will_play_text.csv.bz2"
)
)
)
TITLES_CSV = (
os.path.abspath(
os.path.join(
os.path.dirname(__file__),
"testdata",
"titles.csv"
)
)
)
def waitForIndex(env, idx, timeout=None):
delay = 0.1
while True:
res = env.execute_command("ft.info", idx)
try:
res.index("indexing")
except ValueError:
break
if int(res[res.index("indexing") + 1]) == 0:
break
time.sleep(delay)
if timeout is not None:
timeout -= delay
if timeout <= 0:
break
def getClient():
"""
Gets a client client attached to an index name which is ready to be
created
"""
rc = Redis.from_url(default_redismod_url, decode_responses=True)
return rc
def createIndex(client, num_docs=100, definition=None):
try:
client.create_index(
(TextField("play", weight=5.0),
TextField("txt"),
NumericField("chapter")),
definition=definition,
)
except redis.ResponseError:
client.dropindex(delete_documents=True)
return createIndex(client, num_docs=num_docs, definition=definition)
chapters = {}
bzfp = TextIOWrapper(bz2.BZ2File(WILL_PLAY_TEXT), encoding="utf8")
r = csv.reader(bzfp, delimiter=";")
for n, line in enumerate(r):
play, chapter, _, text = \
line[1], line[2], line[4], line[5]
key = "{}:{}".format(play, chapter).lower()
d = chapters.setdefault(key, {})
d["play"] = play
d["txt"] = d.get("txt", "") + " " + text
d["chapter"] = int(chapter or 0)
if len(chapters) == num_docs:
break
indexer = client.batch_indexer(chunk_size=50)
assert isinstance(indexer, Search.BatchIndexer)
assert 50 == indexer.chunk_size
for key, doc in chapters.items():
indexer.add_document(key, **doc)
indexer.commit()
# override the default module client, search requires both db=0, and text
@pytest.fixture
def modclient():
return Redis.from_url(default_redismod_url, db=0, decode_responses=True)
@pytest.fixture
def client(modclient):
modclient.flushdb()
return modclient
@pytest.mark.redismod
def test_client(client):
num_docs = 500
createIndex(client.ft(), num_docs=num_docs)
waitForIndex(client, "idx")
# verify info
info = client.ft().info()
for k in [
"index_name",
"index_options",
"attributes",
"num_docs",
"max_doc_id",
"num_terms",
"num_records",
"inverted_sz_mb",
"offset_vectors_sz_mb",
"doc_table_size_mb",
"key_table_size_mb",
"records_per_doc_avg",
"bytes_per_record_avg",
"offsets_per_term_avg",
"offset_bits_per_record_avg",
]:
assert k in info
assert client.ft().index_name == info["index_name"]
assert num_docs == int(info["num_docs"])
res = client.ft().search("henry iv")
assert isinstance(res, Result)
assert 225 == res.total
assert 10 == len(res.docs)
assert res.duration > 0
for doc in res.docs:
assert doc.id
assert doc.play == "Henry IV"
assert len(doc.txt) > 0
# test no content
res = client.ft().search(Query("king").no_content())
assert 194 == res.total
assert 10 == len(res.docs)
for doc in res.docs:
assert "txt" not in doc.__dict__
assert "play" not in doc.__dict__
# test verbatim vs no verbatim
total = client.ft().search(Query("kings").no_content()).total
vtotal = client.ft().search(Query("kings").no_content().verbatim()).total
assert total > vtotal
# test in fields
txt_total = (
client.ft().search(
Query("henry").no_content().limit_fields("txt")).total
)
play_total = (
client.ft().search(
Query("henry").no_content().limit_fields("play")).total
)
both_total = (
client.ft()
.search(Query("henry").no_content().limit_fields("play", "txt"))
.total
)
assert 129 == txt_total
assert 494 == play_total
assert 494 == both_total
# test load_document
doc = client.ft().load_document("henry vi part 3:62")
assert doc is not None
assert "henry vi part 3:62" == doc.id
assert doc.play == "Henry VI Part 3"
assert len(doc.txt) > 0
# test in-keys
ids = [x.id for x in client.ft().search(Query("henry")).docs]
assert 10 == len(ids)
subset = ids[:5]
docs = client.ft().search(Query("henry").limit_ids(*subset))
assert len(subset) == docs.total
ids = [x.id for x in docs.docs]
assert set(ids) == set(subset)
# test slop and in order
assert 193 == client.ft().search(Query("henry king")).total
assert 3 == client.ft().search(
Query("henry king").slop(0).in_order()).total
assert 52 == client.ft().search(
Query("king henry").slop(0).in_order()).total
assert 53 == client.ft().search(Query("henry king").slop(0)).total
assert 167 == client.ft().search(Query("henry king").slop(100)).total
# test delete document
client.ft().add_document("doc-5ghs2", play="Death of a Salesman")
res = client.ft().search(Query("death of a salesman"))
assert 1 == res.total
assert 1 == client.ft().delete_document("doc-5ghs2")
res = client.ft().search(Query("death of a salesman"))
assert 0 == res.total
assert 0 == client.ft().delete_document("doc-5ghs2")
client.ft().add_document("doc-5ghs2", play="Death of a Salesman")
res = client.ft().search(Query("death of a salesman"))
assert 1 == res.total
client.ft().delete_document("doc-5ghs2")
@pytest.mark.redismod
@skip_ifmodversion_lt("2.2.0", "search")
def test_payloads(client):
client.ft().create_index((TextField("txt"),))
client.ft().add_document("doc1", payload="foo baz", txt="foo bar")
client.ft().add_document("doc2", txt="foo bar")
q = Query("foo bar").with_payloads()
res = client.ft().search(q)
assert 2 == res.total
assert "doc1" == res.docs[0].id
assert "doc2" == res.docs[1].id
assert "foo baz" == res.docs[0].payload
assert res.docs[1].payload is None
@pytest.mark.redismod
def test_scores(client):
client.ft().create_index((TextField("txt"),))
client.ft().add_document("doc1", txt="foo baz")
client.ft().add_document("doc2", txt="foo bar")
q = Query("foo ~bar").with_scores()
res = client.ft().search(q)
assert 2 == res.total
assert "doc2" == res.docs[0].id
assert 3.0 == res.docs[0].score
assert "doc1" == res.docs[1].id
# todo: enable once new RS version is tagged
# self.assertEqual(0.2, res.docs[1].score)
@pytest.mark.redismod
def test_replace(client):
client.ft().create_index((TextField("txt"),))
client.ft().add_document("doc1", txt="foo bar")
client.ft().add_document("doc2", txt="foo bar")
waitForIndex(client, "idx")
res = client.ft().search("foo bar")
assert 2 == res.total
client.ft().add_document(
"doc1",
replace=True,
txt="this is a replaced doc"
)
res = client.ft().search("foo bar")
assert 1 == res.total
assert "doc2" == res.docs[0].id
res = client.ft().search("replaced doc")
assert 1 == res.total
assert "doc1" == res.docs[0].id
@pytest.mark.redismod
def test_stopwords(client):
client.ft().create_index(
(TextField("txt"),),
stopwords=["foo", "bar", "baz"]
)
client.ft().add_document("doc1", txt="foo bar")
client.ft().add_document("doc2", txt="hello world")
waitForIndex(client, "idx")
q1 = Query("foo bar").no_content()
q2 = Query("foo bar hello world").no_content()
res1, res2 = client.ft().search(q1), client.ft().search(q2)
assert 0 == res1.total
assert 1 == res2.total
@pytest.mark.redismod
def test_filters(client):
client.ft().create_index(
(TextField("txt"),
NumericField("num"),
GeoField("loc"))
)
client.ft().add_document(
"doc1",
txt="foo bar",
num=3.141,
loc="-0.441,51.458"
)
client.ft().add_document("doc2", txt="foo baz", num=2, loc="-0.1,51.2")
waitForIndex(client, "idx")
# Test numerical filter
q1 = Query("foo").add_filter(NumericFilter("num", 0, 2)).no_content()
q2 = (
Query("foo")
.add_filter(
NumericFilter("num", 2, NumericFilter.INF, minExclusive=True))
.no_content()
)
res1, res2 = client.ft().search(q1), client.ft().search(q2)
assert 1 == res1.total
assert 1 == res2.total
assert "doc2" == res1.docs[0].id
assert "doc1" == res2.docs[0].id
# Test geo filter
q1 = Query("foo").add_filter(
GeoFilter("loc", -0.44, 51.45, 10)).no_content()
q2 = Query("foo").add_filter(
GeoFilter("loc", -0.44, 51.45, 100)).no_content()
res1, res2 = client.ft().search(q1), client.ft().search(q2)
assert 1 == res1.total
assert 2 == res2.total
assert "doc1" == res1.docs[0].id
# Sort results, after RDB reload order may change
res = [res2.docs[0].id, res2.docs[1].id]
res.sort()
assert ["doc1", "doc2"] == res
@pytest.mark.redismod
def test_payloads_with_no_content(client):
client.ft().create_index((TextField("txt"),))
client.ft().add_document("doc1", payload="foo baz", txt="foo bar")
client.ft().add_document("doc2", payload="foo baz2", txt="foo bar")
q = Query("foo bar").with_payloads().no_content()
res = client.ft().search(q)
assert 2 == len(res.docs)
@pytest.mark.redismod
def test_sort_by(client):
client.ft().create_index(
(TextField("txt"),
NumericField("num", sortable=True))
)
client.ft().add_document("doc1", txt="foo bar", num=1)
client.ft().add_document("doc2", txt="foo baz", num=2)
client.ft().add_document("doc3", txt="foo qux", num=3)
# Test sort
q1 = Query("foo").sort_by("num", asc=True).no_content()
q2 = Query("foo").sort_by("num", asc=False).no_content()
res1, res2 = client.ft().search(q1), client.ft().search(q2)
assert 3 == res1.total
assert "doc1" == res1.docs[0].id
assert "doc2" == res1.docs[1].id
assert "doc3" == res1.docs[2].id
assert 3 == res2.total
assert "doc1" == res2.docs[2].id
assert "doc2" == res2.docs[1].id
assert "doc3" == res2.docs[0].id
@pytest.mark.redismod
@skip_ifmodversion_lt("2.0.0", "search")
def test_drop_index():
"""
Ensure the index gets dropped by data remains by default
"""
for x in range(20):
for keep_docs in [[True, {}], [False, {"name": "haveit"}]]:
idx = "HaveIt"
index = getClient()
index.hset("index:haveit", mapping={"name": "haveit"})
idef = IndexDefinition(prefix=["index:"])
index.ft(idx).create_index((TextField("name"),), definition=idef)
waitForIndex(index, idx)
index.ft(idx).dropindex(delete_documents=keep_docs[0])
i = index.hgetall("index:haveit")
assert i == keep_docs[1]
@pytest.mark.redismod
def test_example(client):
# Creating the index definition and schema
client.ft().create_index(
(TextField("title", weight=5.0),
TextField("body"))
)
# Indexing a document
client.ft().add_document(
"doc1",
title="RediSearch",
body="Redisearch impements a search engine on top of redis",
)
# Searching with complex parameters:
q = Query("search engine").verbatim().no_content().paging(0, 5)
res = client.ft().search(q)
assert res is not None
@pytest.mark.redismod
def test_auto_complete(client):
n = 0
with open(TITLES_CSV) as f:
cr = csv.reader(f)
for row in cr:
n += 1
term, score = row[0], float(row[1])
assert n == client.ft().sugadd("ac", Suggestion(term, score=score))
assert n == client.ft().suglen("ac")
ret = client.ft().sugget("ac", "bad", with_scores=True)
assert 2 == len(ret)
assert "badger" == ret[0].string
assert isinstance(ret[0].score, float)
assert 1.0 != ret[0].score
assert "badalte rishtey" == ret[1].string
assert isinstance(ret[1].score, float)
assert 1.0 != ret[1].score
ret = client.ft().sugget("ac", "bad", fuzzy=True, num=10)
assert 10 == len(ret)
assert 1.0 == ret[0].score
strs = {x.string for x in ret}
for sug in strs:
assert 1 == client.ft().sugdel("ac", sug)
# make sure a second delete returns 0
for sug in strs:
assert 0 == client.ft().sugdel("ac", sug)
# make sure they were actually deleted
ret2 = client.ft().sugget("ac", "bad", fuzzy=True, num=10)
for sug in ret2:
assert sug.string not in strs
# Test with payload
client.ft().sugadd("ac", Suggestion("pay1", payload="pl1"))
client.ft().sugadd("ac", Suggestion("pay2", payload="pl2"))
client.ft().sugadd("ac", Suggestion("pay3", payload="pl3"))
sugs = client.ft().sugget(
"ac",
"pay",
with_payloads=True,
with_scores=True
)
assert 3 == len(sugs)
for sug in sugs:
assert sug.payload
assert sug.payload.startswith("pl")
@pytest.mark.redismod
def test_no_index(client):
client.ft().create_index(
(
TextField("field"),
TextField("text", no_index=True, sortable=True),
NumericField("numeric", no_index=True, sortable=True),
GeoField("geo", no_index=True, sortable=True),
TagField("tag", no_index=True, sortable=True),
)
)
client.ft().add_document(
"doc1", field="aaa", text="1", numeric="1", geo="1,1", tag="1"
)
client.ft().add_document(
"doc2", field="aab", text="2", numeric="2", geo="2,2", tag="2"
)
waitForIndex(client, "idx")
res = client.ft().search(Query("@text:aa*"))
assert 0 == res.total
res = client.ft().search(Query("@field:aa*"))
assert 2 == res.total
res = client.ft().search(Query("*").sort_by("text", asc=False))
assert 2 == res.total
assert "doc2" == res.docs[0].id
res = client.ft().search(Query("*").sort_by("text", asc=True))
assert "doc1" == res.docs[0].id
res = client.ft().search(Query("*").sort_by("numeric", asc=True))
assert "doc1" == res.docs[0].id
res = client.ft().search(Query("*").sort_by("geo", asc=True))
assert "doc1" == res.docs[0].id
res = client.ft().search(Query("*").sort_by("tag", asc=True))
assert "doc1" == res.docs[0].id
# Ensure exception is raised for non-indexable, non-sortable fields
with pytest.raises(Exception):
TextField("name", no_index=True, sortable=False)
with pytest.raises(Exception):
NumericField("name", no_index=True, sortable=False)
with pytest.raises(Exception):
GeoField("name", no_index=True, sortable=False)
with pytest.raises(Exception):
TagField("name", no_index=True, sortable=False)
@pytest.mark.redismod
def test_partial(client):
client.ft().create_index(
(TextField("f1"),
TextField("f2"),
TextField("f3"))
)
client.ft().add_document("doc1", f1="f1_val", f2="f2_val")
client.ft().add_document("doc2", f1="f1_val", f2="f2_val")
client.ft().add_document("doc1", f3="f3_val", partial=True)
client.ft().add_document("doc2", f3="f3_val", replace=True)
waitForIndex(client, "idx")
# Search for f3 value. All documents should have it
res = client.ft().search("@f3:f3_val")
assert 2 == res.total
# Only the document updated with PARTIAL should still have f1 and f2 values
res = client.ft().search("@f3:f3_val @f2:f2_val @f1:f1_val")
assert 1 == res.total
@pytest.mark.redismod
def test_no_create(client):
client.ft().create_index(
(TextField("f1"),
TextField("f2"),
TextField("f3"))
)
client.ft().add_document("doc1", f1="f1_val", f2="f2_val")
client.ft().add_document("doc2", f1="f1_val", f2="f2_val")
client.ft().add_document("doc1", f3="f3_val", no_create=True)
client.ft().add_document("doc2", f3="f3_val", no_create=True, partial=True)
waitForIndex(client, "idx")
# Search for f3 value. All documents should have it
res = client.ft().search("@f3:f3_val")
assert 2 == res.total
# Only the document updated with PARTIAL should still have f1 and f2 values
res = client.ft().search("@f3:f3_val @f2:f2_val @f1:f1_val")
assert 1 == res.total
with pytest.raises(redis.ResponseError):
client.ft().add_document(
"doc3",
f2="f2_val",
f3="f3_val",
no_create=True
)
@pytest.mark.redismod
def test_explain(client):
client.ft().create_index(
(TextField("f1"),
TextField("f2"),
TextField("f3"))
)
res = client.ft().explain("@f3:f3_val @f2:f2_val @f1:f1_val")
assert res
@pytest.mark.redismod
def test_explaincli(client):
with pytest.raises(NotImplementedError):
client.ft().explain_cli("foo")
@pytest.mark.redismod
def test_summarize(client):
createIndex(client.ft())
waitForIndex(client, "idx")
q = Query("king henry").paging(0, 1)
q.highlight(fields=("play", "txt"), tags=("", ""))
q.summarize("txt")
doc = sorted(client.ft().search(q).docs)[0]
assert "Henry IV" == doc.play
assert (
"ACT I SCENE I. London. The palace. Enter KING HENRY, LORD JOHN OF LANCASTER, the EARL of WESTMORELAND, SIR... " # noqa
== doc.txt
)
q = Query("king henry").paging(0, 1).summarize().highlight()
doc = sorted(client.ft().search(q).docs)[0]
assert "Henry ... " == doc.play
assert (
"ACT I SCENE I. London. The palace. Enter KING HENRY, LORD JOHN OF LANCASTER, the EARL of WESTMORELAND, SIR... " # noqa
== doc.txt
)
@pytest.mark.redismod
@skip_ifmodversion_lt("2.0.0", "search")
def test_alias():
index1 = getClient()
index2 = getClient()
def1 = IndexDefinition(prefix=["index1:"])
def2 = IndexDefinition(prefix=["index2:"])
ftindex1 = index1.ft("testAlias")
ftindex2 = index2.ft("testAlias2")
ftindex1.create_index((TextField("name"),), definition=def1)
ftindex2.create_index((TextField("name"),), definition=def2)
index1.hset("index1:lonestar", mapping={"name": "lonestar"})
index2.hset("index2:yogurt", mapping={"name": "yogurt"})
res = ftindex1.search("*").docs[0]
assert "index1:lonestar" == res.id
# create alias and check for results
ftindex1.aliasadd("spaceballs")
alias_client = getClient().ft("spaceballs")
res = alias_client.search("*").docs[0]
assert "index1:lonestar" == res.id
# Throw an exception when trying to add an alias that already exists
with pytest.raises(Exception):
ftindex2.aliasadd("spaceballs")
# update alias and ensure new results
ftindex2.aliasupdate("spaceballs")
alias_client2 = getClient().ft("spaceballs")
res = alias_client2.search("*").docs[0]
assert "index2:yogurt" == res.id
ftindex2.aliasdel("spaceballs")
with pytest.raises(Exception):
alias_client2.search("*").docs[0]
@pytest.mark.redismod
def test_alias_basic():
# Creating a client with one index
getClient().flushdb()
index1 = getClient().ft("testAlias")
index1.create_index((TextField("txt"),))
index1.add_document("doc1", txt="text goes here")
index2 = getClient().ft("testAlias2")
index2.create_index((TextField("txt"),))
index2.add_document("doc2", txt="text goes here")
# add the actual alias and check
index1.aliasadd("myalias")
alias_client = getClient().ft("myalias")
res = sorted(alias_client.search("*").docs, key=lambda x: x.id)
assert "doc1" == res[0].id
# Throw an exception when trying to add an alias that already exists
with pytest.raises(Exception):
index2.aliasadd("myalias")
# update the alias and ensure we get doc2
index2.aliasupdate("myalias")
alias_client2 = getClient().ft("myalias")
res = sorted(alias_client2.search("*").docs, key=lambda x: x.id)
assert "doc1" == res[0].id
# delete the alias and expect an error if we try to query again
index2.aliasdel("myalias")
with pytest.raises(Exception):
_ = alias_client2.search("*").docs[0]
@pytest.mark.redismod
def test_tags(client):
client.ft().create_index((TextField("txt"), TagField("tags")))
tags = "foo,foo bar,hello;world"
tags2 = "soba,ramen"
client.ft().add_document("doc1", txt="fooz barz", tags=tags)
client.ft().add_document("doc2", txt="noodles", tags=tags2)
waitForIndex(client, "idx")
q = Query("@tags:{foo}")
res = client.ft().search(q)
assert 1 == res.total
q = Query("@tags:{foo bar}")
res = client.ft().search(q)
assert 1 == res.total
q = Query("@tags:{foo\\ bar}")
res = client.ft().search(q)
assert 1 == res.total
q = Query("@tags:{hello\\;world}")
res = client.ft().search(q)
assert 1 == res.total
q2 = client.ft().tagvals("tags")
assert (tags.split(",") + tags2.split(",")).sort() == q2.sort()
@pytest.mark.redismod
def test_textfield_sortable_nostem(client):
# Creating the index definition with sortable and no_stem
client.ft().create_index((TextField("txt", sortable=True, no_stem=True),))
# Now get the index info to confirm its contents
response = client.ft().info()
assert "SORTABLE" in response["attributes"][0]
assert "NOSTEM" in response["attributes"][0]
@pytest.mark.redismod
def test_alter_schema_add(client):
# Creating the index definition and schema
client.ft().create_index(TextField("title"))
# Using alter to add a field
client.ft().alter_schema_add(TextField("body"))
# Indexing a document
client.ft().add_document(
"doc1", title="MyTitle", body="Some content only in the body"
)
# Searching with parameter only in the body (the added field)
q = Query("only in the body")
# Ensure we find the result searching on the added body field
res = client.ft().search(q)
assert 1 == res.total
@pytest.mark.redismod
def test_spell_check(client):
client.ft().create_index((TextField("f1"), TextField("f2")))
client.ft().add_document(
"doc1",
f1="some valid content",
f2="this is sample text"
)
client.ft().add_document("doc2", f1="very important", f2="lorem ipsum")
waitForIndex(client, "idx")
# test spellcheck
res = client.ft().spellcheck("impornant")
assert "important" == res["impornant"][0]["suggestion"]
res = client.ft().spellcheck("contnt")
assert "content" == res["contnt"][0]["suggestion"]
# test spellcheck with Levenshtein distance
res = client.ft().spellcheck("vlis")
assert res == {}
res = client.ft().spellcheck("vlis", distance=2)
assert "valid" == res["vlis"][0]["suggestion"]
# test spellcheck include
client.ft().dict_add("dict", "lore", "lorem", "lorm")
res = client.ft().spellcheck("lorm", include="dict")
assert len(res["lorm"]) == 3
assert (
res["lorm"][0]["suggestion"],
res["lorm"][1]["suggestion"],
res["lorm"][2]["suggestion"],
) == ("lorem", "lore", "lorm")
assert (res["lorm"][0]["score"], res["lorm"][1]["score"]) == ("0.5", "0")
# test spellcheck exclude
res = client.ft().spellcheck("lorm", exclude="dict")
assert res == {}
@pytest.mark.redismod
def test_dict_operations(client):
client.ft().create_index((TextField("f1"), TextField("f2")))
# Add three items
res = client.ft().dict_add("custom_dict", "item1", "item2", "item3")
assert 3 == res
# Remove one item
res = client.ft().dict_del("custom_dict", "item2")
assert 1 == res
# Dump dict and inspect content
res = client.ft().dict_dump("custom_dict")
assert ["item1", "item3"] == res
# Remove rest of the items before reload
client.ft().dict_del("custom_dict", *res)
@pytest.mark.redismod
def test_phonetic_matcher(client):
client.ft().create_index((TextField("name"),))
client.ft().add_document("doc1", name="Jon")
client.ft().add_document("doc2", name="John")
res = client.ft().search(Query("Jon"))
assert 1 == len(res.docs)
assert "Jon" == res.docs[0].name
# Drop and create index with phonetic matcher
client.flushdb()
client.ft().create_index((TextField("name", phonetic_matcher="dm:en"),))
client.ft().add_document("doc1", name="Jon")
client.ft().add_document("doc2", name="John")
res = client.ft().search(Query("Jon"))
assert 2 == len(res.docs)
assert ["John", "Jon"] == sorted([d.name for d in res.docs])
@pytest.mark.redismod
def test_scorer(client):
client.ft().create_index((TextField("description"),))
client.ft().add_document(
"doc1", description="The quick brown fox jumps over the lazy dog"
)
client.ft().add_document(
"doc2",
description="Quick alice was beginning to get very tired of sitting by her quick sister on the bank, and of having nothing to do.", # noqa
)
# default scorer is TFIDF
res = client.ft().search(Query("quick").with_scores())
assert 1.0 == res.docs[0].score
res = client.ft().search(Query("quick").scorer("TFIDF").with_scores())
assert 1.0 == res.docs[0].score
res = client.ft().search(
Query("quick").scorer("TFIDF.DOCNORM").with_scores())
assert 0.1111111111111111 == res.docs[0].score
res = client.ft().search(Query("quick").scorer("BM25").with_scores())
assert 0.17699114465425977 == res.docs[0].score
res = client.ft().search(Query("quick").scorer("DISMAX").with_scores())
assert 2.0 == res.docs[0].score
res = client.ft().search(Query("quick").scorer("DOCSCORE").with_scores())
assert 1.0 == res.docs[0].score
res = client.ft().search(Query("quick").scorer("HAMMING").with_scores())
assert 0.0 == res.docs[0].score
@pytest.mark.redismod
def test_get(client):
client.ft().create_index((TextField("f1"), TextField("f2")))
assert [None] == client.ft().get("doc1")
assert [None, None] == client.ft().get("doc2", "doc1")
client.ft().add_document(
"doc1", f1="some valid content dd1", f2="this is sample text ff1"
)
client.ft().add_document(
"doc2", f1="some valid content dd2", f2="this is sample text ff2"
)
assert [
["f1", "some valid content dd2", "f2", "this is sample text ff2"]
] == client.ft().get("doc2")
assert [
["f1", "some valid content dd1", "f2", "this is sample text ff1"],
["f1", "some valid content dd2", "f2", "this is sample text ff2"],
] == client.ft().get("doc1", "doc2")
@pytest.mark.redismod
@skip_ifmodversion_lt("2.2.0", "search")
def test_config(client):
assert client.ft().config_set("TIMEOUT", "100")
with pytest.raises(redis.ResponseError):
client.ft().config_set("TIMEOUT", "null")
res = client.ft().config_get("*")
assert "100" == res["TIMEOUT"]
res = client.ft().config_get("TIMEOUT")
assert "100" == res["TIMEOUT"]
@pytest.mark.redismod
def test_aggregations_groupby(client):
# Creating the index definition and schema
client.ft().create_index(
(
NumericField("random_num"),
TextField("title"),
TextField("body"),
TextField("parent"),
)
)
# Indexing a document
client.ft().add_document(
"search",
title="RediSearch",
body="Redisearch impements a search engine on top of redis",
parent="redis",
random_num=10,
)
client.ft().add_document(
"ai",
title="RedisAI",
body="RedisAI executes Deep Learning/Machine Learning models and managing their data.", # noqa
parent="redis",
random_num=3,
)
client.ft().add_document(
"json",
title="RedisJson",
body="RedisJSON implements ECMA-404 The JSON Data Interchange Standard as a native data type.", # noqa
parent="redis",
random_num=8,
)
req = aggregations.AggregateRequest("redis").group_by(
"@parent",
reducers.count(),
)
res = client.ft().aggregate(req).rows[0]
assert res[1] == "redis"
assert res[3] == "3"
req = aggregations.AggregateRequest("redis").group_by(
"@parent",
reducers.count_distinct("@title"),
)
res = client.ft().aggregate(req).rows[0]
assert res[1] == "redis"
assert res[3] == "3"
req = aggregations.AggregateRequest("redis").group_by(
"@parent",
reducers.count_distinctish("@title"),
)
res = client.ft().aggregate(req).rows[0]
assert res[1] == "redis"
assert res[3] == "3"
req = aggregations.AggregateRequest("redis").group_by(
"@parent",
reducers.sum("@random_num"),
)
res = client.ft().aggregate(req).rows[0]
assert res[1] == "redis"
assert res[3] == "21" # 10+8+3
req = aggregations.AggregateRequest("redis").group_by(
"@parent",
reducers.min("@random_num"),
)
res = client.ft().aggregate(req).rows[0]
assert res[1] == "redis"
assert res[3] == "3" # min(10,8,3)
req = aggregations.AggregateRequest("redis").group_by(
"@parent",
reducers.max("@random_num"),
)
res = client.ft().aggregate(req).rows[0]
assert res[1] == "redis"
assert res[3] == "10" # max(10,8,3)
req = aggregations.AggregateRequest("redis").group_by(
"@parent",
reducers.avg("@random_num"),
)
res = client.ft().aggregate(req).rows[0]
assert res[1] == "redis"
assert res[3] == "7" # (10+3+8)/3
req = aggregations.AggregateRequest("redis").group_by(
"@parent",
reducers.stddev("random_num"),
)
res = client.ft().aggregate(req).rows[0]
assert res[1] == "redis"
assert res[3] == "3.60555127546"
req = aggregations.AggregateRequest("redis").group_by(
"@parent",
reducers.quantile("@random_num", 0.5),
)
res = client.ft().aggregate(req).rows[0]
assert res[1] == "redis"
assert res[3] == "10"
req = aggregations.AggregateRequest("redis").group_by(
"@parent",
reducers.tolist("@title"),
)
res = client.ft().aggregate(req).rows[0]
assert res[1] == "redis"
assert res[3] == ["RediSearch", "RedisAI", "RedisJson"]
req = aggregations.AggregateRequest("redis").group_by(
"@parent",
reducers.first_value("@title").alias("first"),
)
res = client.ft().aggregate(req).rows[0]
assert res == ['parent', 'redis', 'first', 'RediSearch']
req = aggregations.AggregateRequest("redis").group_by(
"@parent",
reducers.random_sample("@title", 2).alias("random"),
)
res = client.ft().aggregate(req).rows[0]
assert res[1] == "redis"
assert res[2] == "random"
assert len(res[3]) == 2
assert res[3][0] in ["RediSearch", "RedisAI", "RedisJson"]
@pytest.mark.redismod
def test_aggregations_sort_by_and_limit(client):
client.ft().create_index(
(
TextField("t1"),
TextField("t2"),
)
)
client.ft().client.hset("doc1", mapping={'t1': 'a', 't2': 'b'})
client.ft().client.hset("doc2", mapping={'t1': 'b', 't2': 'a'})
# test sort_by using SortDirection
req = aggregations.AggregateRequest("*") \
.sort_by(aggregations.Asc("@t2"), aggregations.Desc("@t1"))
res = client.ft().aggregate(req)
assert res.rows[0] == ['t2', 'a', 't1', 'b']
assert res.rows[1] == ['t2', 'b', 't1', 'a']
# test sort_by without SortDirection
req = aggregations.AggregateRequest("*") \
.sort_by("@t1")
res = client.ft().aggregate(req)
assert res.rows[0] == ['t1', 'a']
assert res.rows[1] == ['t1', 'b']
# test sort_by with max
req = aggregations.AggregateRequest("*") \
.sort_by("@t1", max=1)
res = client.ft().aggregate(req)
assert len(res.rows) == 1
# test limit
req = aggregations.AggregateRequest("*") \
.sort_by("@t1").limit(1, 1)
res = client.ft().aggregate(req)
assert len(res.rows) == 1
assert res.rows[0] == ['t1', 'b']
@pytest.mark.redismod
def test_aggregations_load(client):
client.ft().create_index(
(
TextField("t1"),
TextField("t2"),
)
)
client.ft().client.hset("doc1", mapping={'t1': 'hello', 't2': 'world'})
# load t1
req = aggregations.AggregateRequest("*").load("t1")
res = client.ft().aggregate(req)
assert res.rows[0] == ['t1', 'hello']
# load t2
req = aggregations.AggregateRequest("*").load("t2")
res = client.ft().aggregate(req)
assert res.rows[0] == ['t2', 'world']
@pytest.mark.redismod
def test_aggregations_apply(client):
client.ft().create_index(
(
TextField("PrimaryKey", sortable=True),
NumericField("CreatedDateTimeUTC", sortable=True),
)
)
client.ft().client.hset(
"doc1",
mapping={
'PrimaryKey': '9::362330',
'CreatedDateTimeUTC': '637387878524969984'
}
)
client.ft().client.hset(
"doc2",
mapping={
'PrimaryKey': '9::362329',
'CreatedDateTimeUTC': '637387875859270016'
}
)
req = aggregations.AggregateRequest("*") \
.apply(CreatedDateTimeUTC='@CreatedDateTimeUTC * 10')
res = client.ft().aggregate(req)
assert res.rows[0] == ['CreatedDateTimeUTC', '6373878785249699840']
assert res.rows[1] == ['CreatedDateTimeUTC', '6373878758592700416']
@pytest.mark.redismod
def test_aggregations_filter(client):
client.ft().create_index(
(
TextField("name", sortable=True),
NumericField("age", sortable=True),
)
)
client.ft().client.hset(
"doc1",
mapping={
'name': 'bar',
'age': '25'
}
)
client.ft().client.hset(
"doc2",
mapping={
'name': 'foo',
'age': '19'
}
)
req = aggregations.AggregateRequest("*") \
.filter("@name=='foo' && @age < 20")
res = client.ft().aggregate(req)
assert len(res.rows) == 1
assert res.rows[0] == ['name', 'foo', 'age', '19']
req = aggregations.AggregateRequest("*") \
.filter("@age > 15").sort_by("@age")
res = client.ft().aggregate(req)
assert len(res.rows) == 2
assert res.rows[0] == ['age', '19']
assert res.rows[1] == ['age', '25']
@pytest.mark.redismod
@skip_ifmodversion_lt("2.0.0", "search")
def test_index_definition(client):
"""
Create definition and test its args
"""
with pytest.raises(RuntimeError):
IndexDefinition(prefix=["hset:", "henry"], index_type="json")
definition = IndexDefinition(
prefix=["hset:", "henry"],
filter="@f1==32",
language="English",
language_field="play",
score_field="chapter",
score=0.5,
payload_field="txt",
index_type=IndexType.JSON,
)
assert [
"ON",
"JSON",
"PREFIX",
2,
"hset:",
"henry",
"FILTER",
"@f1==32",
"LANGUAGE_FIELD",
"play",
"LANGUAGE",
"English",
"SCORE_FIELD",
"chapter",
"SCORE",
0.5,
"PAYLOAD_FIELD",
"txt",
] == definition.args
createIndex(client.ft(), num_docs=500, definition=definition)
@pytest.mark.redismod
@skip_ifmodversion_lt("2.0.0", "search")
def test_create_client_definition(client):
"""
Create definition with no index type provided,
and use hset to test the client definition (the default is HASH).
"""
definition = IndexDefinition(prefix=["hset:", "henry"])
createIndex(client.ft(), num_docs=500, definition=definition)
info = client.ft().info()
assert 494 == int(info["num_docs"])
client.ft().client.hset("hset:1", "f1", "v1")
info = client.ft().info()
assert 495 == int(info["num_docs"])
@pytest.mark.redismod
@skip_ifmodversion_lt("2.0.0", "search")
def test_create_client_definition_hash(client):
"""
Create definition with IndexType.HASH as index type (ON HASH),
and use hset to test the client definition.
"""
definition = IndexDefinition(
prefix=["hset:", "henry"],
index_type=IndexType.HASH
)
createIndex(client.ft(), num_docs=500, definition=definition)
info = client.ft().info()
assert 494 == int(info["num_docs"])
client.ft().client.hset("hset:1", "f1", "v1")
info = client.ft().info()
assert 495 == int(info["num_docs"])
@pytest.mark.redismod
@skip_ifmodversion_lt("2.2.0", "search")
def test_create_client_definition_json(client):
"""
Create definition with IndexType.JSON as index type (ON JSON),
and use json client to test it.
"""
definition = IndexDefinition(prefix=["king:"], index_type=IndexType.JSON)
client.ft().create_index((TextField("$.name"),), definition=definition)
client.json().set("king:1", Path.rootPath(), {"name": "henry"})
client.json().set("king:2", Path.rootPath(), {"name": "james"})
res = client.ft().search("henry")
assert res.docs[0].id == "king:1"
assert res.docs[0].payload is None
assert res.docs[0].json == '{"name":"henry"}'
assert res.total == 1
@pytest.mark.redismod
@skip_ifmodversion_lt("2.2.0", "search")
def test_fields_as_name(client):
# create index
SCHEMA = (
TextField("$.name", sortable=True, as_name="name"),
NumericField("$.age", as_name="just_a_number"),
)
definition = IndexDefinition(index_type=IndexType.JSON)
client.ft().create_index(SCHEMA, definition=definition)
# insert json data
res = client.json().set(
"doc:1",
Path.rootPath(),
{"name": "Jon", "age": 25}
)
assert res
total = client.ft().search(
Query("Jon").return_fields("name", "just_a_number")).docs
assert 1 == len(total)
assert "doc:1" == total[0].id
assert "Jon" == total[0].name
assert "25" == total[0].just_a_number
@pytest.mark.redismod
@skip_ifmodversion_lt("2.2.0", "search")
def test_search_return_fields(client):
res = client.json().set(
"doc:1",
Path.rootPath(),
{"t": "riceratops", "t2": "telmatosaurus", "n": 9072, "flt": 97.2},
)
assert res
# create index on
definition = IndexDefinition(index_type=IndexType.JSON)
SCHEMA = (
TextField("$.t"),
NumericField("$.flt"),
)
client.ft().create_index(SCHEMA, definition=definition)
waitForIndex(client, "idx")
total = client.ft().search(
Query("*").return_field("$.t", as_field="txt")).docs
assert 1 == len(total)
assert "doc:1" == total[0].id
assert "riceratops" == total[0].txt
total = client.ft().search(
Query("*").return_field("$.t2", as_field="txt")).docs
assert 1 == len(total)
assert "doc:1" == total[0].id
assert "telmatosaurus" == total[0].txt
@pytest.mark.redismod
def test_synupdate(client):
definition = IndexDefinition(index_type=IndexType.HASH)
client.ft().create_index(
(
TextField("title"),
TextField("body"),
),
definition=definition,
)
client.ft().synupdate("id1", True, "boy", "child", "offspring")
client.ft().add_document(
"doc1",
title="he is a baby",
body="this is a test")
client.ft().synupdate("id1", True, "baby")
client.ft().add_document(
"doc2",
title="he is another baby",
body="another test"
)
res = client.ft().search(Query("child").expander("SYNONYM"))
assert res.docs[0].id == "doc2"
assert res.docs[0].title == "he is another baby"
assert res.docs[0].body == "another test"
@pytest.mark.redismod
def test_syndump(client):
definition = IndexDefinition(index_type=IndexType.HASH)
client.ft().create_index(
(
TextField("title"),
TextField("body"),
),
definition=definition,
)
client.ft().synupdate("id1", False, "boy", "child", "offspring")
client.ft().synupdate("id2", False, "baby", "child")
client.ft().synupdate("id3", False, "tree", "wood")
res = client.ft().syndump()
assert res == {
"boy": ["id1"],
"tree": ["id3"],
"wood": ["id3"],
"child": ["id1", "id2"],
"baby": ["id2"],
"offspring": ["id1"],
}
@pytest.mark.redismod
@skip_ifmodversion_lt("2.2.0", "search")
def test_create_json_with_alias(client):
"""
Create definition with IndexType.JSON as index type (ON JSON) with two
fields with aliases, and use json client to test it.
"""
definition = IndexDefinition(prefix=["king:"], index_type=IndexType.JSON)
client.ft().create_index(
(TextField("$.name", as_name="name"),
NumericField("$.num", as_name="num")),
definition=definition
)
client.json().set("king:1", Path.rootPath(), {"name": "henry",
"num": 42})
client.json().set("king:2", Path.rootPath(), {"name": "james",
"num": 3.14})
res = client.ft().search("@name:henry")
assert res.docs[0].id == "king:1"
assert res.docs[0].json == '{"name":"henry","num":42}'
assert res.total == 1
res = client.ft().search("@num:[0 10]")
assert res.docs[0].id == "king:2"
assert res.docs[0].json == '{"name":"james","num":3.14}'
assert res.total == 1
# Tests returns an error if path contain special characters (user should
# use an alias)
with pytest.raises(Exception):
client.ft().search("@$.name:henry")
@pytest.mark.redismod
@skip_ifmodversion_lt("2.2.0", "search")
def test_json_with_multipath(client):
"""
Create definition with IndexType.JSON as index type (ON JSON),
and use json client to test it.
"""
definition = IndexDefinition(prefix=["king:"], index_type=IndexType.JSON)
client.ft().create_index(
(TagField("$..name", as_name="name")),
definition=definition
)
client.json().set("king:1", Path.rootPath(),
{"name": "henry", "country": {"name": "england"}})
res = client.ft().search("@name:{henry}")
assert res.docs[0].id == "king:1"
assert res.docs[0].json == '{"name":"henry","country":{"name":"england"}}'
assert res.total == 1
res = client.ft().search("@name:{england}")
assert res.docs[0].id == "king:1"
assert res.docs[0].json == '{"name":"henry","country":{"name":"england"}}'
assert res.total == 1
@pytest.mark.redismod
@skip_ifmodversion_lt("2.2.0", "search")
def test_json_with_jsonpath(client):
definition = IndexDefinition(index_type=IndexType.JSON)
client.ft().create_index(
(TextField('$["prod:name"]', as_name="name"),
TextField('$.prod:name', as_name="name_unsupported")),
definition=definition
)
client.json().set("doc:1", Path.rootPath(), {"prod:name": "RediSearch"})
# query for a supported field succeeds
res = client.ft().search(Query("@name:RediSearch"))
assert res.total == 1
assert res.docs[0].id == "doc:1"
assert res.docs[0].json == '{"prod:name":"RediSearch"}'
# query for an unsupported field fails
res = client.ft().search("@name_unsupported:RediSearch")
assert res.total == 0
# return of a supported field succeeds
res = client.ft().search(Query("@name:RediSearch").return_field("name"))
assert res.total == 1
assert res.docs[0].id == "doc:1"
assert res.docs[0].name == 'RediSearch'
# return of an unsupported field fails
res = client.ft().search(Query("@name:RediSearch")
.return_field("name_unsupported"))
assert res.total == 1
assert res.docs[0].id == "doc:1"
with pytest.raises(Exception):
res.docs[0].name_unsupported
@pytest.mark.redismod
def test_profile(client):
client.ft().create_index((TextField('t'),))
client.ft().client.hset('1', 't', 'hello')
client.ft().client.hset('2', 't', 'world')
# check using Query
q = Query('hello|world').no_content()
res, det = client.ft().profile(q)
assert det['Iterators profile']['Counter'] == 2.0
assert len(det['Iterators profile']['Child iterators']) == 2
assert det['Iterators profile']['Type'] == 'UNION'
assert det['Parsing time'] < 0.3
assert len(res.docs) == 2 # check also the search result
# check using AggregateRequest
req = aggregations.AggregateRequest("*").load("t")\
.apply(prefix="startswith(@t, 'hel')")
res, det = client.ft().profile(req)
assert det['Iterators profile']['Counter'] == 2.0
assert det['Iterators profile']['Type'] == 'WILDCARD'
assert det['Parsing time'] < 0.3
assert len(res.rows) == 2 # check also the search result
@pytest.mark.redismod
def test_profile_limited(client):
client.ft().create_index((TextField('t'),))
client.ft().client.hset('1', 't', 'hello')
client.ft().client.hset('2', 't', 'hell')
client.ft().client.hset('3', 't', 'help')
client.ft().client.hset('4', 't', 'helowa')
q = Query('%hell% hel*')
res, det = client.ft().profile(q, limited=True)
assert det['Iterators profile']['Child iterators'][0]['Child iterators'] \
== 'The number of iterators in the union is 3'
assert det['Iterators profile']['Child iterators'][1]['Child iterators'] \
== 'The number of iterators in the union is 4'
assert det['Iterators profile']['Type'] == 'INTERSECT'
assert len(res.docs) == 3 # check also the search result