How to find a string within another, ignoring some characters?
Tag : search , By : orlandoferrer
Date : January 11 2021, 05:14 PM
|
I think the issue was by ths following , OK so here's my solution, including a sample to test it: TextSearchUtil.kt object TextSearchUtil {
/**@return where the query was found. First integer is the start. The second is the last, excluding.
* Special cases: Pair(0,0) if query is empty or ignored, null if not found.
* @param text the text to search within. Only allowed characters are searched for. Rest are ignored
* @param query what to search for. Only allowed characters are searched for. Rest are ignored
* @param allowedCharactersSet the only characters we should be allowed to check. Rest are ignored*/
fun findOccurrenceWhileIgnoringCharacters(text: String, query: String, allowedCharactersSet: HashSet<Char>): Pair<Int, Int>? {
//get index of first char to search for
var searchIndexStart = -1
for ((index, c) in query.withIndex())
if (allowedCharactersSet.contains(c)) {
searchIndexStart = index
break
}
if (searchIndexStart == -1) {
//query contains only ignored characters, so it's like an empty one
return Pair(0, 0)
}
//got index of first character to search for
if (text.isEmpty())
//need to search for a character, but the text is empty, so not found
return null
var mainIndex = 0
while (mainIndex < text.length) {
var searchIndex = searchIndexStart
var isFirstCharToSearchFor = true
var secondaryIndex = mainIndex
var charToSearch = query[searchIndex]
secondaryLoop@ while (secondaryIndex < text.length) {
//skip ignored characters on query
if (!isFirstCharToSearchFor)
while (!allowedCharactersSet.contains(charToSearch)) {
++searchIndex
if (searchIndex >= query.length) {
//reached end of search while all characters were fine, so found the match
return Pair(mainIndex, secondaryIndex)
}
charToSearch = query[searchIndex]
}
//skip ignored characters on text
var c: Char? = null
while (secondaryIndex < text.length) {
c = text[secondaryIndex]
if (allowedCharactersSet.contains(c))
break
else {
if (isFirstCharToSearchFor)
break@secondaryLoop
++secondaryIndex
}
}
//reached end of text
if (secondaryIndex == text.length) {
if (isFirstCharToSearchFor)
//couldn't find the first character anywhere, so failed to find the query
return null
break@secondaryLoop
}
//time to compare
if (c != charToSearch)
break@secondaryLoop
++searchIndex
isFirstCharToSearchFor = false
if (searchIndex >= query.length) {
//reached end of search while all characters were fine, so found the match
return Pair(mainIndex, secondaryIndex + 1)
}
charToSearch = query[searchIndex]
++secondaryIndex
}
++mainIndex
}
return null
}
}
class MainActivity : AppCompatActivity() {
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
setContentView(R.layout.activity_main)
//
val text = "+972 50-123-45678"
val allowedCharacters = "01234567890+*#"
val allowedPhoneCharactersSet = HashSet<Char>(allowedCharacters.length)
for (c in allowedCharacters)
allowedPhoneCharactersSet.add(c)
//
val tests = hashMapOf(
"" to Pair(0, 0),
"9" to Pair(1, 2),
"97" to Pair(1, 3),
"250" to Pair(3, 7),
"250123" to Pair(3, 11),
"250118" to null,
"++" to null,
"8" to Pair(16, 17),
"+" to Pair(0, 1),
"+8" to null,
"78" to Pair(15, 17),
"5678" to Pair(13, 17),
"788" to null,
"+ " to Pair(0, 1),
" " to Pair(0, 0),
"+ 5" to null,
"+ 9" to Pair(0, 2)
)
for (test in tests) {
val result = TextSearchUtil.findOccurrenceWhileIgnoringCharacters(text, test.key, allowedPhoneCharactersSet)
val isResultCorrect = result == test.value
val foundStr = if (result == null) null else text.substring(result.first, result.second)
when {
!isResultCorrect -> Log.e("AppLog", "checking query of \"${test.key}\" inside \"$text\" . Succeeded?$isResultCorrect Result: $result found String: \"$foundStr\"")
foundStr == null -> Log.d("AppLog", "checking query of \"${test.key}\" inside \"$text\" . Succeeded?$isResultCorrect Result: $result")
else -> Log.d("AppLog", "checking query of \"${test.key}\" inside \"$text\" . Succeeded?$isResultCorrect Result: $result found String: \"$foundStr\"")
}
}
//
Log.d("AppLog", "special cases:")
Log.d("AppLog", "${TextSearchUtil.findOccurrenceWhileIgnoringCharacters("a", "c", allowedPhoneCharactersSet) == Pair(0, 0)}")
Log.d("AppLog", "${TextSearchUtil.findOccurrenceWhileIgnoringCharacters("ab", "c", allowedPhoneCharactersSet) == Pair(0, 0)}")
Log.d("AppLog", "${TextSearchUtil.findOccurrenceWhileIgnoringCharacters("ab", "cd", allowedPhoneCharactersSet) == Pair(0, 0)}")
Log.d("AppLog", "${TextSearchUtil.findOccurrenceWhileIgnoringCharacters("a", "cd", allowedPhoneCharactersSet) == Pair(0, 0)}")
}
}
val pair = TextSearchUtil.findOccurrenceWhileIgnoringCharacters(text, "2501", allowedPhoneCharactersSet)
if (pair == null)
textView.text = text
else {
val wordToSpan = SpannableString(text)
wordToSpan.setSpan(BackgroundColorSpan(0xFFFFFF00.toInt()), pair.first, pair.second, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
textView.setText(wordToSpan, TextView.BufferType.SPANNABLE)
}
Boards Message : |
You Must Login
Or Sign Up
to Add Your Comments . |
Share :
|
Find substring ignoring specified characters
Tag : chash , By : Kbotei
Date : March 29 2020, 07:55 AM
This might help you Do any of you know of an easy/clean way to find a substring within a string while ignoring some specified characters to find it. I think an example would explain things better: , in your example you would do: string input = "Hello, -this-, is a string";
string ignore = "[-,]*";
Regex r = new Regex(string.Format("H{0}e{0}l{0}l{0}o{0} {0}t{0}h{0}i{0}s{0}", ignore));
Match m = r.Match(input);
return m.Success ? m.Value : string.Empty;
public string Test(string query, string input, char[] ignorelist)
{
string ignorePattern = "[";
for (int i=0; i<ignoreList.Length; i++)
{
if (ignoreList[i] == '-')
{
ignorePattern.Insert(1, "-");
}
else
{
ignorePattern += ignoreList[i];
}
}
ignorePattern += "]*";
for (int i = 0; i < query.Length; i++)
{
pattern += query[0] + ignorepattern;
}
Regex r = new Regex(pattern);
Match m = r.Match(input);
return m.IsSuccess ? m.Value : string.Empty;
}
|
python find position of characters in a string ignoring special characters in this string
Tag : string , By : Lucas Thompson
Date : March 29 2020, 07:55 AM
this one helps. I solved by : counting the number of occurrence of the character in s1 at position i and then find the character s1[i] in s2 that has the same number of occurrence def find_nth(needle,haystack, n):
start = haystack.find(needle)
while start >= 0 and n > 1:
start = haystack.find(needle, start+len(needle))
n -= 1
return start
for i in range(len(s1)) :
occurrence= s1[:i+1].count(s1[i])
j=find_nth(s1[i], s2, occurrence)
|
How to find a sequence of chars (ignoring characters in between) in a string (JAVA)
Tag : java , By : user152423
Date : March 29 2020, 07:55 AM
fixed the issue. Will look into that further I found a really fast way of doing this. The array Sa stores at index i, how often the sequence SUN is contained in the substring of the input string starting at i. Ua stores how often the sequence UN is contained. And Na stores how often N is contained. This is a form of memoization https://en.wikipedia.org/wiki/Memoizationpublic static long getDegree(String sequence){
if(sequence.length() == 0) return 0;
long degree = 0;
char[] array = sequence.toCharArray();
long[] Sa = new long[array.length];
long[] Ua = new long[array.length];
long[] Na = new long[array.length];
if(array[array.length - 1] == 'N') Na[array.length - 1] = 1;
else Na[array.length - 1] = 0;
Ua[array.length - 1] = 0;
Sa[array.length - 1] = 0;
for(int i = array.length - 2; i >= 0; i--) {
char c = array[i];
Na[i] = Na[i + 1] + (c == 'N' ? 1 : 0);
Ua[i] = Ua[i + 1] + (c == 'U' ? Na[i + 1] : 0);
Sa[i] = Sa[i + 1] + (c == 'S' ? Ua[i + 1] : 0);
}
return Sa[0];
}
public static long getDegree(String sequence){
if(sequence.length() == 0) return 0;
long degree = 0;
char[] array = sequence.toCharArray();
long S = 0;
long U = 0;
long N = 0;
for(int i = array.length - 1; i >= 0; i--) {
char c = array[i];
S = S + (c == 'S' ? U : 0);
U = U + (c == 'U' ? N : 0);
N = N + (c == 'N' ? 1 : 0);
}
return S;
}
|
How to find next 9 characters after a string ignoring special characters?
Tag : python , By : inquiringmind
Date : March 29 2020, 07:55 AM
this will help Here is a simple approach to first find the intended text using this regex, \b(?:NRC|AZN|BSA|SSR)(?:.?\d)+
import re
s = 'This is a sample text NRC234456789 and this is another case AZN.1.Z.3.4.S.6.7.8.9 and this another case BSA 123 456 789 and BSA 123 456 789 123 456 final case SSR/789456123'
list_comb = ['NRC', 'AZN', 'BSA', 'SSR']
regex = r'\b(?:{})(?:.?[\dA-Z])+'.format('|'.join(list_comb))
print(regex)
for m in re.findall(regex, s):
m = re.sub(r'[^a-zA-Z0-9]+', '', m)
mat = re.search(r'^(.{3})(.{9})', m)
if mat:
s1 = mat.group(1)
s2 = mat.group(2).replace('S','5').replace('Z','2')
print(s1+s2)
NRC234456789
AZN123456789
BSA123456789
BSA123456789
SSR789456123
|
Efficiently find whether a string contains a group of characters (like substring but ignoring order)?
Date : March 29 2020, 07:55 AM
|
|
|
Related QUESTIONS :
|