Click here to monitor SSC
SQLServerCentral is supported by Red Gate Software Ltd.
 
Log in  ::  Register  ::  Not logged in
 
 
 
        
Home       Members    Calendar    Who's On


Add to briefcase «««1314151617

A Google-like Full Text Search Expand / Collapse
Author
Message
Posted Thursday, February 23, 2012 11:59 AM
Forum Newbie

Forum NewbieForum NewbieForum NewbieForum NewbieForum NewbieForum NewbieForum NewbieForum Newbie

Group: General Forum Members
Last Login: Wednesday, April 11, 2012 10:26 AM
Points: 4, Visits: 22
Someone know a unmanaged C++ version of this code?
Thanks
Post #1256899
Posted Sunday, March 11, 2012 5:40 AM
Forum Newbie

Forum NewbieForum NewbieForum NewbieForum NewbieForum NewbieForum NewbieForum NewbieForum Newbie

Group: General Forum Members
Last Login: Tuesday, March 20, 2012 11:55 PM
Points: 1, Visits: 8
Hello Mike, thanks for the great sample it's really useful.

I see that it's an old problem that google-like sentence can't start from negative token.
When trying to convert "-key1 -key2" sentence - exception is thrown.
As you told few times before in this thread there is a workaround for this.
Could you please provide some more detailed explanation of this workaround or may be sample code?

Thanks in advance.
Post #1264847
Posted Thursday, July 12, 2012 8:28 AM
Forum Newbie

Forum NewbieForum NewbieForum NewbieForum NewbieForum NewbieForum NewbieForum NewbieForum Newbie

Group: General Forum Members
Last Login: Thursday, July 12, 2012 8:23 AM
Points: 1, Visits: 8
Did you find out how to solve this?
Post #1328934
Posted Friday, July 27, 2012 3:26 AM
Forum Newbie

Forum NewbieForum NewbieForum NewbieForum NewbieForum NewbieForum NewbieForum NewbieForum Newbie

Group: General Forum Members
Last Login: Tuesday, August 14, 2012 1:53 AM
Points: 1, Visits: 4
Repost from irony.codeplex.com http://irony.codeplex.com/discussions/389099

I have a question about TermType in the SearchGrammar sample. If this is the wrong forum for this, please forgive.

It seems to me that TermType will always be Inflectional for AND'ed terms, no matter what TermType you hand ConvertQuery()

Example with FTS TermType=Exact:

AND Query: classical music
Fts: ( FORMSOF (INFLECTIONAL, classical) AND FORMSOF (INFLECTIONAL, music) )

OR Query: classical | music
Fts: (classical OR music)

In line 121 of SearchGrammar.cs, the TermType is set explicitly to Inflectional for AND but not for negation or OR.

Is there a reason for this, am I missing something?
Post #1336343
Posted Tuesday, August 21, 2012 1:27 PM
Forum Newbie

Forum NewbieForum NewbieForum NewbieForum NewbieForum NewbieForum NewbieForum NewbieForum Newbie

Group: General Forum Members
Last Login: Monday, August 27, 2012 5:02 AM
Points: 1, Visits: 6
EDIT: think i might have just answered my own question! Post removed!
Post #1348061
Posted Friday, February 8, 2013 2:05 PM
SSC Rookie

SSC RookieSSC RookieSSC RookieSSC RookieSSC RookieSSC RookieSSC RookieSSC Rookie

Group: General Forum Members
Last Login: Wednesday, August 27, 2014 9:28 AM
Points: 48, Visits: 273
Hi,
i am using your code to search word in sql server table.
I have one table candidates , i have lakhs of records in a Table.
I am searching the word in the c_resume_text field.

contains(c_resume_text,'C++') -It shwoing c++ and also other than c++ results.
contains(c_resume_text,'C++11') -It showing results.
contains(c_resume_text,'C++1') -It showing Zero results.

in c_resume_text filed there is no word c++11 , but we have c++ word.

In the above 3rd query if i remove the 1 from it not showing any result?why?

2nd query showing results? why?

I need to change any code....here

Please help me....

public static DataTable ExecuteQuery(string ftsQuery)
{
SqlDataAdapter da = null;
DataTable dt = null;
try
{
dt = new DataTable();
da = new SqlDataAdapter
(
"SELECT ROW_NUMBER() OVER (ORDER BY c_id desc) AS Number, " +
" c_id, " +
" c_first_name " +
" c_last_name " +
"FROM candidates " +
"WHERE CONTAINS(c_resume_text, @ftsQuery);",
connectionString
);
da.SelectCommand.Parameters.Add("@ftsQuery", SqlDbType.NVarChar, 4000).Value = ftsQuery;
da.Fill(dt);
da.Dispose();
}
catch (Exception ex)
{
if (da != null)
da.Dispose();
if (dt != null)
dt.Dispose();
throw (ex);
}
return dt;
}
public SearchGrammar()
{
// Terminals
var Term = new IdentifierTerminal("Term", "!@#$%^*_'.?", "!@#$%^*_'.?0123456789");
// The following is not very imporant, but makes scanner recognize "or" and "and" as operators, not Terms
// The "or" and "and" operator symbols found in grammar get higher priority in scanning and are checked
// first, before the Term terminal, so Scanner produces operator token, not Term. For our purposes it does
// not matter, we get around without it.
Term.Priority = Terminal.LowestPriority;
var Phrase = new StringLiteral("Phrase");

// NonTerminals
var OrExpression = new NonTerminal("OrExpression");
var OrOperator = new NonTerminal("OrOperator");
var AndExpression = new NonTerminal("AndExpression");
var AndOperator = new NonTerminal("AndOperator");
var ExcludeOperator = new NonTerminal("ExcludeOperator");
var PrimaryExpression = new NonTerminal("PrimaryExpression");
var ThesaurusExpression = new NonTerminal("ThesaurusExpression");
var ThesaurusOperator = new NonTerminal("ThesaurusOperator");
var ExactOperator = new NonTerminal("ExactOperator");
var ExactExpression = new NonTerminal("ExactExpression");
var ParenthesizedExpression = new NonTerminal("ParenthesizedExpression");
var ProximityExpression = new NonTerminal("ProximityExpression");
var ProximityList = new NonTerminal("ProximityList");

this.Root = OrExpression;
OrExpression.Rule = AndExpression
| OrExpression + OrOperator + AndExpression;
OrOperator.Rule = Symbol("or") | "|";
AndExpression.Rule = PrimaryExpression
| AndExpression + AndOperator + PrimaryExpression;
AndOperator.Rule = Empty
| "and"
| "&"
| ExcludeOperator;
ExcludeOperator.Rule = Symbol("-");
PrimaryExpression.Rule = Term
| ThesaurusExpression
| ExactExpression
| ParenthesizedExpression
| Phrase
| ProximityExpression;
ThesaurusExpression.Rule = ThesaurusOperator + Term;
ThesaurusOperator.Rule = Symbol("~");
ExactExpression.Rule = ExactOperator + Term
| ExactOperator + Phrase;
ExactOperator.Rule = Symbol("+");
ParenthesizedExpression.Rule = "(" + OrExpression + ")";
ProximityExpression.Rule = "<" + ProximityList + ">";

MakePlusRule(ProximityList, Term);

RegisterPunctuation("<", ">", "(", ")");

}
public static string ConvertQuery(AstNode node, TermType type)
{
string result = "";
// Note that some NonTerminals don't actually get into the AST tree,
// because of some Irony's optimizations - punctuation stripping and
// node bubbling. For example, ParenthesizedExpression - parentheses
// symbols get stripped off as punctuation, and child expression node
// (parenthesized content) replaces the parent ParExpr node (the
// child is "bubbled up").
switch (node.Term.Name)
{
case "OrExpression":
result = "(" + ConvertQuery(node.ChildNodes[0], type) + " OR " +
ConvertQuery(node.ChildNodes[2], type) + ")";
break;

case "AndExpression":
AstNode tmp2 = node.ChildNodes[1];
string opName = tmp2.Term.Name;
string andop = "";

if (opName == "-")
{
andop += " AND NOT ";
}
else
{
andop = " AND ";
type = TermType.Inflectional;
}
result = "(" + ConvertQuery(node.ChildNodes[0], type) + andop +
ConvertQuery(node.ChildNodes[2], type) + ")";
type = TermType.Inflectional;
break;

case "PrimaryExpression":
result = "(" + ConvertQuery(node.ChildNodes[0], type) + ")";
break;

case "ProximityList":
string[] tmp = new string[node.ChildNodes.Count];
type = TermType.Exact;
for (int i = 0; i < node.ChildNodes.Count; i++)
{
tmp[i] = ConvertQuery(node.ChildNodes[i], type);
}
result = "(" + string.Join(" NEAR ", tmp) + ")";
type = TermType.Inflectional;
break;

case "Phrase":
result = '"' + ((Token)node).ValueString + '"';
break;

case "ThesaurusExpression":
result = " FORMSOF (THESAURUS, " +
((Token)node.ChildNodes[1]).ValueString + ") ";
break;

case "ExactExpression":
result = " \"" + ((Token)node.ChildNodes[1]).ValueString + "\" ";
break;

case "Term":
switch (type)
{
case TermType.Inflectional:
result = ((Token)node).ValueString;
if (result.EndsWith("*"))
result = "\"" + result + "\"";
else
result = " FORMSOF (INFLECTIONAL, " + result + ") ";
break;
case TermType.Exact:
result = ((Token)node).ValueString;

break;
}
break;

// This should never happen, even if input string is garbage
default:
throw new ApplicationException("Converter failed: unexpected term: " +
node.Term.Name + ". Please investigate.");

}
return result;
}
public enum TermType
{
Inflectional = 1,
Thesaurus = 2,
Exact = 3
}

Thanks,
Post #1417904
Posted Friday, February 8, 2013 2:08 PM
SSC Rookie

SSC RookieSSC RookieSSC RookieSSC RookieSSC RookieSSC RookieSSC RookieSSC Rookie

Group: General Forum Members
Last Login: Wednesday, August 27, 2014 9:28 AM
Points: 48, Visits: 273
Hi,
i am using your code to search word in sql server table.
I have one table candidates , i have lakhs of records in a Table.
I am searching the word in the c_resume_text field.

contains(c_resume_text,'C++') -It shwoing c++ and also other than c++ results.
contains(c_resume_text,'C++11') -It showing results.
contains(c_resume_text,'C++1') -It showing Zero results.

in c_resume_text filed there is no word c++11 , but we have c++ word.

In the above 3rd query if i remove the 1 from it not showing any result?why?

2nd query showing results? why?

I need to change any code....here

Please help me....

public static DataTable ExecuteQuery(string ftsQuery)
{
SqlDataAdapter da = null;
DataTable dt = null;
try
{
dt = new DataTable();
da = new SqlDataAdapter
(
"SELECT ROW_NUMBER() OVER (ORDER BY c_id desc) AS Number, " +
" c_id, " +
" c_first_name " +
" c_last_name " +
"FROM candidates " +
"WHERE CONTAINS(c_resume_text, @ftsQuery);",
connectionString
);
da.SelectCommand.Parameters.Add("@ftsQuery", SqlDbType.NVarChar, 4000).Value = ftsQuery;
da.Fill(dt);
da.Dispose();
}
catch (Exception ex)
{
if (da != null)
da.Dispose();
if (dt != null)
dt.Dispose();
throw (ex);
}
return dt;
}
public SearchGrammar()
{
// Terminals
var Term = new IdentifierTerminal("Term", "!@#$%^*_'.?", "!@#$%^*_'.?0123456789");
// The following is not very imporant, but makes scanner recognize "or" and "and" as operators, not Terms
// The "or" and "and" operator symbols found in grammar get higher priority in scanning and are checked
// first, before the Term terminal, so Scanner produces operator token, not Term. For our purposes it does
// not matter, we get around without it.
Term.Priority = Terminal.LowestPriority;
var Phrase = new StringLiteral("Phrase");

// NonTerminals
var OrExpression = new NonTerminal("OrExpression");
var OrOperator = new NonTerminal("OrOperator");
var AndExpression = new NonTerminal("AndExpression");
var AndOperator = new NonTerminal("AndOperator");
var ExcludeOperator = new NonTerminal("ExcludeOperator");
var PrimaryExpression = new NonTerminal("PrimaryExpression");
var ThesaurusExpression = new NonTerminal("ThesaurusExpression");
var ThesaurusOperator = new NonTerminal("ThesaurusOperator");
var ExactOperator = new NonTerminal("ExactOperator");
var ExactExpression = new NonTerminal("ExactExpression");
var ParenthesizedExpression = new NonTerminal("ParenthesizedExpression");
var ProximityExpression = new NonTerminal("ProximityExpression");
var ProximityList = new NonTerminal("ProximityList");

this.Root = OrExpression;
OrExpression.Rule = AndExpression
| OrExpression + OrOperator + AndExpression;
OrOperator.Rule = Symbol("or") | "|";
AndExpression.Rule = PrimaryExpression
| AndExpression + AndOperator + PrimaryExpression;
AndOperator.Rule = Empty
| "and"
| "&"
| ExcludeOperator;
ExcludeOperator.Rule = Symbol("-");
PrimaryExpression.Rule = Term
| ThesaurusExpression
| ExactExpression
| ParenthesizedExpression
| Phrase
| ProximityExpression;
ThesaurusExpression.Rule = ThesaurusOperator + Term;
ThesaurusOperator.Rule = Symbol("~");
ExactExpression.Rule = ExactOperator + Term
| ExactOperator + Phrase;
ExactOperator.Rule = Symbol("+");
ParenthesizedExpression.Rule = "(" + OrExpression + ")";
ProximityExpression.Rule = "<" + ProximityList + ">";

MakePlusRule(ProximityList, Term);

RegisterPunctuation("<", ">", "(", ")");

}
public static string ConvertQuery(AstNode node, TermType type)
{
string result = "";
// Note that some NonTerminals don't actually get into the AST tree,
// because of some Irony's optimizations - punctuation stripping and
// node bubbling. For example, ParenthesizedExpression - parentheses
// symbols get stripped off as punctuation, and child expression node
// (parenthesized content) replaces the parent ParExpr node (the
// child is "bubbled up").
switch (node.Term.Name)
{
case "OrExpression":
result = "(" + ConvertQuery(node.ChildNodes[0], type) + " OR " +
ConvertQuery(node.ChildNodes[2], type) + ")";
break;

case "AndExpression":
AstNode tmp2 = node.ChildNodes[1];
string opName = tmp2.Term.Name;
string andop = "";

if (opName == "-")
{
andop += " AND NOT ";
}
else
{
andop = " AND ";
type = TermType.Inflectional;
}
result = "(" + ConvertQuery(node.ChildNodes[0], type) + andop +
ConvertQuery(node.ChildNodes[2], type) + ")";
type = TermType.Inflectional;
break;

case "PrimaryExpression":
result = "(" + ConvertQuery(node.ChildNodes[0], type) + ")";
break;

case "ProximityList":
string[] tmp = new string[node.ChildNodes.Count];
type = TermType.Exact;
for (int i = 0; i < node.ChildNodes.Count; i++)
{
tmp[i] = ConvertQuery(node.ChildNodes[i], type);
}
result = "(" + string.Join(" NEAR ", tmp) + ")";
type = TermType.Inflectional;
break;

case "Phrase":
result = '"' + ((Token)node).ValueString + '"';
break;

case "ThesaurusExpression":
result = " FORMSOF (THESAURUS, " +
((Token)node.ChildNodes[1]).ValueString + ") ";
break;

case "ExactExpression":
result = " \"" + ((Token)node.ChildNodes[1]).ValueString + "\" ";
break;

case "Term":
switch (type)
{
case TermType.Inflectional:
result = ((Token)node).ValueString;
if (result.EndsWith("*"))
result = "\"" + result + "\"";
else
result = " FORMSOF (INFLECTIONAL, " + result + ") ";
break;
case TermType.Exact:
result = ((Token)node).ValueString;

break;
}
break;

// This should never happen, even if input string is garbage
default:
throw new ApplicationException("Converter failed: unexpected term: " +
node.Term.Name + ". Please investigate.");

}
return result;
}
public enum TermType
{
Inflectional = 1,
Thesaurus = 2,
Exact = 3
}

Thanks,
Post #1417905
« Prev Topic | Next Topic »

Add to briefcase «««1314151617

Permissions Expand / Collapse