logo
down
shadow

How to find a string within another, ignoring some characters?


How to find a string within another, ignoring some characters?

Content Index :

How to find a string within another, ignoring some characters?
Tag : search , By : orlandoferrer
Date : January 11 2021, 05:14 PM

I think the issue was by ths following , OK so here's my solution, including a sample to test it:
TextSearchUtil.kt
object TextSearchUtil {
    /**@return where the query was found. First integer is the start. The second is the last, excluding.
     * Special cases: Pair(0,0) if query is empty or ignored, null if not found.
     * @param text the text to search within. Only allowed characters are searched for. Rest are ignored
     * @param query what to search for. Only allowed characters are searched for. Rest are ignored
     * @param allowedCharactersSet the only characters we should be allowed to check. Rest are ignored*/
    fun findOccurrenceWhileIgnoringCharacters(text: String, query: String, allowedCharactersSet: HashSet<Char>): Pair<Int, Int>? {
        //get index of first char to search for
        var searchIndexStart = -1
        for ((index, c) in query.withIndex())
            if (allowedCharactersSet.contains(c)) {
                searchIndexStart = index
                break
            }
        if (searchIndexStart == -1) {
            //query contains only ignored characters, so it's like an empty one
            return Pair(0, 0)
        }
        //got index of first character to search for
        if (text.isEmpty())
        //need to search for a character, but the text is empty, so not found
            return null
        var mainIndex = 0
        while (mainIndex < text.length) {
            var searchIndex = searchIndexStart
            var isFirstCharToSearchFor = true
            var secondaryIndex = mainIndex
            var charToSearch = query[searchIndex]
            secondaryLoop@ while (secondaryIndex < text.length) {
                //skip ignored characters on query
                if (!isFirstCharToSearchFor)
                    while (!allowedCharactersSet.contains(charToSearch)) {
                        ++searchIndex
                        if (searchIndex >= query.length) {
                            //reached end of search while all characters were fine, so found the match
                            return Pair(mainIndex, secondaryIndex)
                        }
                        charToSearch = query[searchIndex]
                    }
                //skip ignored characters on text
                var c: Char? = null
                while (secondaryIndex < text.length) {
                    c = text[secondaryIndex]
                    if (allowedCharactersSet.contains(c))
                        break
                    else {
                        if (isFirstCharToSearchFor)
                            break@secondaryLoop
                        ++secondaryIndex
                    }
                }
                //reached end of text
                if (secondaryIndex == text.length) {
                    if (isFirstCharToSearchFor)
                    //couldn't find the first character anywhere, so failed to find the query
                        return null
                    break@secondaryLoop
                }
                //time to compare
                if (c != charToSearch)
                    break@secondaryLoop
                ++searchIndex
                isFirstCharToSearchFor = false
                if (searchIndex >= query.length) {
                    //reached end of search while all characters were fine, so found the match
                    return Pair(mainIndex, secondaryIndex + 1)
                }
                charToSearch = query[searchIndex]
                ++secondaryIndex
            }
            ++mainIndex
        }
        return null
    }
}
class MainActivity : AppCompatActivity() {

    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)
        setContentView(R.layout.activity_main)
        //
        val text = "+972 50-123-45678"
        val allowedCharacters = "01234567890+*#"
        val allowedPhoneCharactersSet = HashSet<Char>(allowedCharacters.length)
        for (c in allowedCharacters)
            allowedPhoneCharactersSet.add(c)
        //
        val tests = hashMapOf(
                "" to Pair(0, 0),
                "9" to Pair(1, 2),
                "97" to Pair(1, 3),
                "250" to Pair(3, 7),
                "250123" to Pair(3, 11),
                "250118" to null,
                "++" to null,
                "8" to Pair(16, 17),
                "+" to Pair(0, 1),
                "+8" to null,
                "78" to Pair(15, 17),
                "5678" to Pair(13, 17),
                "788" to null,
                "+ " to Pair(0, 1),
                "  " to Pair(0, 0),
                "+ 5" to null,
                "+ 9" to Pair(0, 2)
        )
        for (test in tests) {
            val result = TextSearchUtil.findOccurrenceWhileIgnoringCharacters(text, test.key, allowedPhoneCharactersSet)
            val isResultCorrect = result == test.value
            val foundStr = if (result == null) null else text.substring(result.first, result.second)
            when {
                !isResultCorrect -> Log.e("AppLog", "checking query of \"${test.key}\" inside \"$text\" . Succeeded?$isResultCorrect Result: $result found String: \"$foundStr\"")
                foundStr == null -> Log.d("AppLog", "checking query of \"${test.key}\" inside \"$text\" . Succeeded?$isResultCorrect Result: $result")
                else -> Log.d("AppLog", "checking query of \"${test.key}\" inside \"$text\" . Succeeded?$isResultCorrect Result: $result found String: \"$foundStr\"")

            }
        }
        //
        Log.d("AppLog", "special cases:")
        Log.d("AppLog", "${TextSearchUtil.findOccurrenceWhileIgnoringCharacters("a", "c", allowedPhoneCharactersSet) == Pair(0, 0)}")
        Log.d("AppLog", "${TextSearchUtil.findOccurrenceWhileIgnoringCharacters("ab", "c", allowedPhoneCharactersSet) == Pair(0, 0)}")
        Log.d("AppLog", "${TextSearchUtil.findOccurrenceWhileIgnoringCharacters("ab", "cd", allowedPhoneCharactersSet) == Pair(0, 0)}")
        Log.d("AppLog", "${TextSearchUtil.findOccurrenceWhileIgnoringCharacters("a", "cd", allowedPhoneCharactersSet) == Pair(0, 0)}")
    }

}
    val pair = TextSearchUtil.findOccurrenceWhileIgnoringCharacters(text, "2501", allowedPhoneCharactersSet)
    if (pair == null)
        textView.text = text
    else {
        val wordToSpan = SpannableString(text)
        wordToSpan.setSpan(BackgroundColorSpan(0xFFFFFF00.toInt()), pair.first, pair.second, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE)
        textView.setText(wordToSpan, TextView.BufferType.SPANNABLE)
    }

Comments
No Comments Right Now !

Boards Message :
You Must Login Or Sign Up to Add Your Comments .

Share : facebook icon twitter icon

Find substring ignoring specified characters


Tag : chash , By : Kbotei
Date : March 29 2020, 07:55 AM
This might help you Do any of you know of an easy/clean way to find a substring within a string while ignoring some specified characters to find it. I think an example would explain things better: , in your example you would do:
string input = "Hello, -this-, is a string";
string ignore = "[-,]*";
Regex r = new Regex(string.Format("H{0}e{0}l{0}l{0}o{0} {0}t{0}h{0}i{0}s{0}", ignore));
Match m = r.Match(input);
return m.Success ? m.Value : string.Empty;
public string Test(string query, string input, char[] ignorelist)
{
    string ignorePattern = "[";
    for (int i=0; i<ignoreList.Length; i++)
    {
        if (ignoreList[i] == '-')
        {
            ignorePattern.Insert(1, "-");
        }
        else
        {
            ignorePattern += ignoreList[i];
        }
    }

    ignorePattern += "]*";

    for (int i = 0; i < query.Length; i++)
    {
        pattern += query[0] + ignorepattern;
    }

    Regex r = new Regex(pattern);
    Match m = r.Match(input);
    return m.IsSuccess ? m.Value : string.Empty;
}

python find position of characters in a string ignoring special characters in this string


Tag : string , By : Lucas Thompson
Date : March 29 2020, 07:55 AM
this one helps. I solved by : counting the number of occurrence of the character in s1 at position i and then find the character s1[i] in s2 that has the same number of occurrence
def find_nth(needle,haystack, n):
    start = haystack.find(needle)
    while start >= 0 and n > 1:
        start = haystack.find(needle, start+len(needle))
        n -= 1
    return start


for i in range(len(s1)) :
        occurrence= s1[:i+1].count(s1[i])    

        j=find_nth(s1[i], s2, occurrence)

How to find a sequence of chars (ignoring characters in between) in a string (JAVA)


Tag : java , By : user152423
Date : March 29 2020, 07:55 AM
fixed the issue. Will look into that further I found a really fast way of doing this. The array Sa stores at index i, how often the sequence SUN is contained in the substring of the input string starting at i. Ua stores how often the sequence UN is contained. And Na stores how often N is contained. This is a form of memoization https://en.wikipedia.org/wiki/Memoization
public static long getDegree(String sequence){
  if(sequence.length() == 0) return 0;
  long degree = 0;
  char[] array = sequence.toCharArray();

  long[] Sa = new long[array.length];
  long[] Ua = new long[array.length];
  long[] Na = new long[array.length];

  if(array[array.length - 1] == 'N') Na[array.length - 1] = 1;
  else Na[array.length - 1] = 0;
  Ua[array.length - 1] = 0;
  Sa[array.length - 1] = 0;

  for(int i = array.length - 2; i >= 0; i--) {
    char c = array[i];
    Na[i] = Na[i + 1] + (c == 'N' ? 1 : 0);
    Ua[i] = Ua[i + 1] + (c == 'U' ? Na[i + 1] : 0);
    Sa[i] = Sa[i + 1] + (c == 'S' ? Ua[i + 1] : 0);
  }
  return Sa[0];
}
public static long getDegree(String sequence){
  if(sequence.length() == 0) return 0;
  long degree = 0;
  char[] array = sequence.toCharArray();

  long S = 0;
  long U = 0;
  long N = 0;

  for(int i = array.length - 1; i >= 0; i--) {
    char c = array[i];
    S = S + (c == 'S' ? U : 0);
    U = U + (c == 'U' ? N : 0);
    N = N + (c == 'N' ? 1 : 0);
  }
  return S;
}

How to find next 9 characters after a string ignoring special characters?


Tag : python , By : inquiringmind
Date : March 29 2020, 07:55 AM
this will help Here is a simple approach to first find the intended text using this regex,
\b(?:NRC|AZN|BSA|SSR)(?:.?\d)+
import re

s = 'This is a sample text NRC234456789 and this is another case AZN.1.Z.3.4.S.6.7.8.9 and this another case BSA 123 456 789 and BSA 123 456 789 123 456 final case SSR/789456123'

list_comb = ['NRC', 'AZN', 'BSA', 'SSR']
regex = r'\b(?:{})(?:.?[\dA-Z])+'.format('|'.join(list_comb))
print(regex)

for m in re.findall(regex, s):
 m = re.sub(r'[^a-zA-Z0-9]+', '', m)
 mat = re.search(r'^(.{3})(.{9})', m)
 if mat:
  s1 = mat.group(1)
  s2 = mat.group(2).replace('S','5').replace('Z','2')
  print(s1+s2)
NRC234456789
AZN123456789
BSA123456789
BSA123456789
SSR789456123

Efficiently find whether a string contains a group of characters (like substring but ignoring order)?


Tag : python , By : BinaryBoy
Date : March 29 2020, 07:55 AM
shadow
Privacy Policy - Terms - Contact Us © scrbit.com