Best algorithm to index sentences
Date : March 29 2020, 07:55 AM
To fix this issue Imagine I have a situation where I need to index sentences. Let me explain it a little bit deeper. , This oughta get you close, in C#: class Program
{
public class Node
{
private string _term;
private Dictionary<string, KeyValuePair<Node, Node>> _related = new Dictionary<string, KeyValuePair<Node, Node>>();
public Node(string term)
{
_term = term;
}
public void Add(string phrase, Node previous, string [] phraseRemainder, Dictionary<string,Node> existing)
{
Node next= null;
if (phraseRemainder.Length > 0)
{
if (!existing.TryGetValue(phraseRemainder[0], out next))
{
existing[phraseRemainder[0]] = next = new Node(phraseRemainder[0]);
}
next.Add(phrase, this, phraseRemainder.Skip(1).ToArray(), existing);
}
_related.Add(phrase, new KeyValuePair<Node, Node>(previous, next));
}
}
static void Main(string[] args)
{
string [] sentences =
new string [] {
"The beautiful sky",
"Beautiful sky dream",
"beautiful dream"
};
Dictionary<string, Node> parsedSentences = new Dictionary<string,Node>();
foreach(string sentence in sentences)
{
string [] words = sentence.ToLowerInvariant().Split(' ');
Node startNode;
if (!parsedSentences.TryGetValue(words[0],out startNode))
{
parsedSentences[words[0]] = startNode = new Node(words[0]);
}
if (words.Length > 1)
startNode.Add(sentence,null,words.Skip(1).ToArray(),parsedSentences);
}
}
}
|
Date : March 29 2020, 07:55 AM
To fix the issue you can do Is there an algorithm that can be used to extract simple sentences from paragraphs? , I have just used openNLP for the same. public static List<String> breakIntoSentencesOpenNlp(String paragraph) throws FileNotFoundException, IOException,
InvalidFormatException {
InputStream is = new FileInputStream("resources/models/en-sent.bin");
SentenceModel model = new SentenceModel(is);
SentenceDetectorME sdetector = new SentenceDetectorME(model);
String[] sentDetect = sdetector.sentDetect(paragraph);
is.close();
return Arrays.asList(sentDetect);
}
//Failed at Hi.
paragraph = "Hi. How are you? This is Mike.";
SentenceDetector.breakIntoSentencesOpenNlp(paragraph).forEach(sentence -> System.out.println(sentence));
//Failed at Door.Noone
paragraph = "Close the Door.Noone is out there";
SentenceDetector.breakIntoSentencesOpenNlp(paragraph).forEach(sentence -> System.out.println(sentence));//not able to break on noone
paragraph = "Really!! I cant believe. Mr. Wilson can come any moment to receive mrs. watson.";
SentenceDetector.breakIntoSentencesOpenNlp(paragraph).forEach(sentence -> System.out.println(sentence));
//Failed at dr.
paragraph = "Radhika, Mohan, and Shaik went to meet dr. Kashyap to raise fund for poor patients.";
SentenceDetector.breakIntoSentencesOpenNlp(paragraph).forEach(sentence -> System.out.println(sentence));//breaking on dr.
paragraph = "This is how I tried to split a paragraph into a sentence. But, there is a problem. My paragraph includes dates like Jan.13, 2014 , words like U.S. and numbers like 2.2. They all got splitted by the above code.";
SentenceDetector.breakIntoSentencesOpenNlp(paragraph).forEach(sentence -> System.out.println(sentence));//breaking on dr.
paragraph = "www.thinkzarahatke.com is the second site I developed. You can send mail to admin@thinkzarahatke.com";
SentenceDetector.breakIntoSentencesOpenNlp(paragraph).forEach(sentence -> System.out.println(sentence));
public static List<String> breakIntoSentencesCustomRESplitter(String paragraph){
List<String> sentences = new ArrayList<String>();
Pattern re = Pattern.compile("[^.!?\\s][^.!?]*(?:[.!?](?!['\"]?\\s|$)[^.!?]*)*[.!?]?['\"]?(?=\\s|$)", Pattern.MULTILINE | Pattern.COMMENTS);
Matcher reMatcher = re.matcher(paragraph);
while (reMatcher.find()) {
sentences.add(reMatcher.group());
}
return sentences;
}
paragraph = "Hi. How are you? This is Mike.";
SentenceDetector.breakIntoSentencesCustomRESplitter(paragraph).forEach(sentence -> System.out.println(sentence));
//Failed at Door.Noone
paragraph = "Close the Door.Noone is out there";
SentenceDetector.breakIntoSentencesCustomRESplitter(paragraph).forEach(sentence -> System.out.println(sentence));
//Failed at Mr., mrs.
paragraph = "Really!! I cant believe. Mr. Wilson can come any moment to receive mrs. watson.";
SentenceDetector.breakIntoSentencesCustomRESplitter(paragraph).forEach(sentence -> System.out.println(sentence));
//Failed at dr.
paragraph = "Radhika, Mohan, and Shaik went to meet dr. Kashyap to raise fund for poor patients.";
SentenceDetector.breakIntoSentencesCustomRESplitter(paragraph).forEach(sentence -> System.out.println(sentence));
//Failed at U.S.
paragraph = "This is how I tried to split a paragraph into a sentence. But, there is a problem. My paragraph includes dates like Jan.13, 2014 , words like U.S. and numbers like 2.2. They all got splitted by the above code.";
SentenceDetector.breakIntoSentencesCustomRESplitter(paragraph).forEach(sentence -> System.out.println(sentence));
paragraph = "www.thinkzarahatke.com is the second site I developed. You can send mail to admin@thinkzarahatke.com";
SentenceDetector.breakIntoSentencesCustomRESplitter(paragraph).forEach(sentence -> System.out.println(sentence));
public static List<String> breakIntoSentencesBreakIterator(String paragraph){
List<String> sentences = new ArrayList<String>();
BreakIterator sentenceIterator =
BreakIterator.getSentenceInstance(Locale.ENGLISH);
BreakIterator sentenceInstance = sentenceIterator.getSentenceInstance();
sentenceInstance.setText(paragraph);
int end = sentenceInstance.last();
for (int start = sentenceInstance.previous();
start != BreakIterator.DONE;
end = start, start = sentenceInstance.previous()) {
sentences.add(paragraph.substring(start,end));
}
return sentences;
}
paragraph = "Hi. How are you? This is Mike.";
SentenceDetector.breakIntoSentencesBreakIterator(paragraph).forEach(sentence -> System.out.println(sentence));
//Failed at Door.Noone
paragraph = "Close the Door.Noone is out there";
SentenceDetector.breakIntoSentencesBreakIterator(paragraph).forEach(sentence -> System.out.println(sentence));
//Failed at Mr.
paragraph = "Really!! I cant believe. Mr. Wilson can come any moment to receive mrs. watson.";
SentenceDetector.breakIntoSentencesBreakIterator(paragraph).forEach(sentence -> System.out.println(sentence));
//Failed at dr.
paragraph = "Radhika, Mohan, and Shaik went to meet dr. Kashyap to raise fund for poor patients.";
SentenceDetector.breakIntoSentencesBreakIterator(paragraph).forEach(sentence -> System.out.println(sentence));
paragraph = "This is how I tried to split a paragraph into a sentence. But, there is a problem. My paragraph includes dates like Jan.13, 2014 , words like U.S. and numbers like 2.2. They all got splitted by the above code.";
SentenceDetector.breakIntoSentencesBreakIterator(paragraph).forEach(sentence -> System.out.println(sentence));
paragraph = "www.thinkzarahatke.com is the second site I developed. You can send mail to admin@thinkzarahatke.com";
SentenceDetector.breakIntoSentencesBreakIterator(paragraph).forEach(sentence -> System.out.println(sentence));
|
Negating sentences using POS-tagging
Date : March 29 2020, 07:55 AM
I wish this help you I'm trying to find a way to negate sentences based on POS-tagging. Please consider: , Give this a try: $sentence = preg_replace("/(\s)(?:(?!never|neither|not)(\w*))\/(JJ|MD|RB|VB|VBD|VBN)\b/", "$1not$2", $sentence);
|
An Algorithm to Determine How Similar Two Sentences Are
Date : March 29 2020, 07:55 AM
|
Is negating inequality with NULL different from negating?
Tag : c , By : Vorinowsky
Date : March 29 2020, 07:55 AM
I hope this helps . NULL is guaranteed to be a null pointer constant. Not to be confused with a null pointer (which could in theory have a non-zero representation). A null pointer constant can be defined as either (void*)0 or 0. !(void*)0 and !0 both give the value 1 so either version is guaranteed to work.
|