how can i split a string on every kth occurence of a space but with overlap

  • Last Update :
  • Techknowledgy :
>>> line = '1234567890' >>>
   n = 2 >>>
   [line[i: i + n]
      for i in range(0, len(line), n)
   ]
   ['12', '34', '56', '78', '90']

Just to be complete, you can do this with a regex:

>>>
import re
   >>>
   re.findall('..', '1234567890')['12', '34', '56', '78', '90']

For odd number of chars you can do this:

>>>
import re
   >>>
   re.findall('..?', '123456789')['12', '34', '56', '78', '9']

You can also do the following, to simplify the regex for longer chunks:

>>>
import re
   >>>
   re.findall('.{1,2}', '123456789')['12', '34', '56', '78', '9']

There is already an inbuilt function in python for this.

>>> from textwrap
import wrap
   >>>
   s = '1234567890' >>>
   wrap(s, 2)['12', '34', '56', '78', '90']

This is what the docstring for wrap says:

>>> help(wrap)
''
'
Help on
function wrap in module textwrap:

   wrap(text, width = 70, ** kwargs)
Wrap a single paragraph of text, returning a list of wrapped lines.

Reformat the single paragraph in 'text'
so it fits in lines of no
more than 'width'
columns, and
return a list of wrapped lines.By
default, tabs in 'text'
are expanded with string.expandtabs(), and
all other whitespace characters(including newline) are converted to
space.See TextWrapper class
for available keyword args to customize
wrapping behaviour.
''
'

Another common way of grouping elements into n-length groups:

>>> s = '1234567890' >>>
   map(''.join, zip( * [iter(s)] * 2))['12', '34', '56', '78', '90']

I think this is shorter and more readable than the itertools version:

def split_by_n(seq, n):
   ''
'A generator to divide a sequence into chunks of n units.'
''
while seq:
   yield seq[: n]
seq = seq[n: ]

print(list(split_by_n('1234567890', 2)))

Using more-itertools from PyPI:

>>> from more_itertools
import sliced
   >>>
   list(sliced('1234567890', 2))['12', '34', '56', '78', '90']

Suggestion : 2

Last updated: Jun 24, 2022

Copied!my_str = 'one-two-three-four'

my_list = my_str.split('-')
print(my_list) #๐Ÿ‘‰๏ธ['one', 'two', 'three', 'four']

n = 2

first = '-'.join(my_list[: n])
second = '-'.join(my_list[n: ])

print(first) #๐Ÿ‘‰๏ธ 'one-two'
print(second) #๐Ÿ‘‰๏ธ 'three-four'
Copied!my_str = 'one-two-three-four'

my_list = my_str.split('-')
print(my_list) #๐Ÿ‘‰๏ธ['one', 'two', 'three', 'four']

n = 2

print(my_list[: n]) #['one', 'two']
print(my_list[n: ]) #['three', 'four']

first = '-'.join(my_list[: n])
second = '-'.join(my_list[n: ])

print(first) #๐Ÿ‘‰๏ธ 'one-two'
print(second) #๐Ÿ‘‰๏ธ 'three-four'
Copied!my_str = 'one-two-three-four'

n = 2

first = '-'.join(my_str.split('-', n)[: n])
second = '-'.join(my_str.split('-', n)[n: ])

print(first) #๐Ÿ‘‰๏ธ 'one-two'
print(second) #๐Ÿ‘‰๏ธ 'three-four'

Suggestion : 3

Last Updated : 05 Jul, 2022

Examples: 

Input: str = "geeksforgeeks"
Output: geeks

Input: str = "aab"
Output: a

Input: str = "aabaabaaba"
Output: aaba

Input: str = "aaaaaaaaaaa"
Output: aaaaa

Input: str = "banana"
Output: an
or na

Dynamic Programming : This problem can be solved in O(n2) time using Dynamic Programming. The basic idea is to find the longest repeating suffix for all prefixes in the string str. 

Length of longest non - repeating substring can be recursively
defined as below.

LCSRe(i, j) stores length of the matching and
non - overlapping substrings ending
with i 'th and j'
th characters.

If str[i - 1] == str[j - 1] && (j - i) > LCSRe(i - 1, j - 1)
LCSRe(i, j) = LCSRe(i - 1, j - 1) + 1,
   Else
LCSRe(i, j) = 0

Where i varies from 1 to n and
j varies from i + 1 to n

geeks

geeks

Suggestion : 4

The optional argument "overlaps" determines whether the pattern can match at every position in str (true), or only for unique occurrences of the complete pattern (false). The default is true. ,If the optional argument overlap is true (default), the returned vector can include overlapping positions. For example: ,If the optional argument deblank is true, then the spaces will be removed from the end of the character data. ,Return the vector of all positions in the longer of the two strings s and t where an occurrence of the shorter of the two starts.

quote = ...
   "First things first, but not necessarily in that order";
quote(quote == " ") = "_"โ‡’
quote =
   First_things_first, _but_not_necessarily_in_that_order
deblank("    abc  ")โ‡’
"    abc"

deblank([" abc   ";
   "   def   "
])โ‡’[" abc  ";
   "   def"]
strtrim("    abc  ")โ‡’
"abc"

strtrim([" abc   ";
   "   def   "
])โ‡’["abc  ";
   "  def"]
findstr("ababab", "a")โ‡’[1, 3, 5];
findstr("abababa", "aba", 0)โ‡’[1, 5]
index("Teststring", "t")โ‡’ 4
rindex("Teststring", "t")โ‡’ 6

Suggestion : 5

Given two strings source and pattern, you want to find the nth occurrence of pattern in source.,Use the find member function to locate successive instances of the substring you are looking for. Example 4-17 contains a simple nthSubstr function.,There's also live online events, interactive content, certification prep materials, and more.,Get Mark Richardsโ€™s Software Architecture Patterns ebook to better understand how to design componentsโ€”and how they should interact.

#include <string>
#include <iostream>

using namespace std;

int nthSubstr(int n, const string& s,
              const string& p) {
   string::size_type i = s.find(p);     // Find the first occurrence

   int j;
   for (j = 1; j < n && i != string::npos; ++j)
      i = s.find(p, i+1); // Find the next occurrence

   if (j == n)
     return(i);
   else
     return(-1);
}

int main() {
   string s = "the wind, the sea, the sky, the trees";
   string p = "the";

   cout << nthSubstr(1, s, p) << '\n';
   cout << nthSubstr(2, s, p) << '\n';
   cout << nthSubstr(5, s, p) << '\n';
}
#include <string>
#include <iostream>

using namespace std;

template<typename T>
int nthSubstrg(int n, const basic_string<T>& s,
               const basic_string<T>& p,
               bool repeats = false) {
   string::size_type i = s.find(p);
   string::size_type adv = (repeats) ? 1 : p.length();

   int j;
   for (j = 1; j < n && i != basic_string<T>::npos; ++j)
      i = s.find(p, i+adv);

   if (j == n)
     return(i);
   else
     return(-1);
}

int main() {
   string s = "AGATGCCATATATATACGATATCCTTA";
   string p = "ATAT";

   cout << p << " as non-repeating occurs at "
        << nthSubstrg(3, s, p) << '\n';
   cout << p << " as repeating occurs at "
        << nthSubstrg(3, s, p, true) << '\n';
}
1._
#include <string>
#include <iostream>

using namespace std;

template<typename T>
int nthSubstrg(int n, const basic_string<T>& s,
               const basic_string<T>& p,
               bool repeats = false) {
   string::size_type i = s.find(p);
   string::size_type adv = (repeats) ? 1 : p.length();

   int j;
   for (j = 1; j < n && i != basic_string<T>::npos; ++j)
      i = s.find(p, i+adv);

   if (j == n)
     return(i);
   else
     return(-1);
}

int main() {
   string s = "AGATGCCATATATATACGATATCCTTA";
   string p = "ATAT";

   cout << p << " as non-repeating occurs at "
        << nthSubstrg(3, s, p) << '\n';
   cout << p << " as repeating occurs at "
        << nthSubstrg(3, s, p, true) << '\n';
}
ATAT as non - repeating occurs at 18
ATAT as repeating occurs at 11

Suggestion : 6

C = strsplit(str) splits str at whitespace into C. A whitespace character is equivalent to any sequence in the set {' ','\f','\n','\r','\t','\v'}.,If str has consecutive whitespace characters, then strsplit treats them as one whitespace.,C = strsplit(str,delimiter) splits str at the delimiters specified by delimiter.,Split the same character vector on whitespace and on 'ain', using regular expressions and treating multiple delimiters separately.

str = 'The rain in Spain.';
C = strsplit(str)
C = 1 x4 cell {
   'The'
} {
   'rain'
} {
   'in'
} {
   'Spain.'
}
data = '1.21, 1.985, 1.955, 2.015, 1.885';
C = strsplit(data, ', ')
C = 1 x5 cell {
   '1.21'
} {
   '1.985'
} {
   '1.955'
} {
   '2.015'
} {
   '1.885'
}
data = '1.21m/s1.985m/s 1.955 m/s2.015 m/s 1.885m/s';
[C, matches] = strsplit(data, '\s*m/s\s*', ...
   'DelimiterType', 'RegularExpression')
C = 1 x6 cell {
   '1.21'
} {
   '1.985'
} {
   '1.955'
} {
   '2.015'
} {
   '1.885'
} {
   0x0 char
}