
Split a (character) string into comma (plus a blank) delimitedstrings based on a change of character (left to right).
Show the output here (use the 1st example below).
Blanks should be treated as any other character (exceptthey are problematic to display clearly). The same appliesto commas.
For instance, the string:
gHHH5YY++///\
should be split and show:
g, HHH, 5, YY, ++, ///, \
F split(input, delim) V res = ‘’ L(ch) input I !res.empty & ch != res.last res ‘’= delim res ‘’= ch R resprint(split(‘gHHH5YY++///\’, ‘, ’))
g, HHH, 5, YY, ++, ///, \
org100hjmpdemo;;;Split the string under DE on changing characters,;;;and store the result at HL.split:ldaxd; Load character from stringspcopy:movm,a; Store in outputcpi'$'; CP/M string terminatorrz; Stop when the end is reachedmovb,a; Store previous character in Binxd; Increment input pointerinxh; Increment output pointerldaxd; Get next charactercmpb; Same as previous character?jzspcopy; Then just copy itcpi'$'; Otherwise, if it is the en jzspcopy; Then just copy it as wellmvim,','; Otherwise, add a comma and a spaceinxhmvim,' 'inxhjmpspcopy;;;Demo codedemo:lxid,stringlxih,outcallsplit; Split the stringlxid,outmvic,9; And print it using CP/Mjmp5string:db'gHHH5YY++///',5Ch,'$'out:equ$
g, HHH, 5, YY, ++, ///, \
cpu8086org100hsection.textjmpdemo;;;Split the string at DS:SI on changing characters,;;;and store the result at ES:DI.split:lodsb; Load character.copy:stosb; Store in outputcmpal,'$'; Done yet?je.out; If so, stop.movah,al; Store previous characterlodsb; Get next charactercmpal,ah; Same character?je.copy; Then just copy itcmpal,'$'; End of string?je.copy; Then just copy it toomovdl,almovax,','; Otherwise, add a comma and a spacestoswmoval,dljmp.copy.out:ret;;;Demo codedemo:movsi,stringmovdi,bufcallsplit; Split the stringmovdx,bufmovah,9int21h; And print the result using DOSretsection.datastring:db'gHHH5YY++///\$'section.bssbuf:resb32
g, HHH, 5, YY, ++, ///, \
/* ARM assembly AARCH64 Raspberry PI 3B *//* program splitcar64.s *//*******************************************//* Constantes file *//*******************************************//* for this file see task include a file in language AArch64 assembly*/.include "../includeConstantesARM64.inc" /*********************************//* Initialized data *//*********************************/.dataszCarriageReturn: .asciz "\n"szString1: .asciz "gHHH5YY++///\\"/* IMPORTANT REMARK for compiler as The way to get special characters into a string is to escape these characters: precede themwith a backslash ‘\’ character. For example ‘\\’ represents one backslash: the first \ isan escape which tells as to interpret the second character literally as a backslash (whichprevents as from recognizing the second \ as an escape character).*/ /*********************************//* UnInitialized data *//*********************************/.bss sBuffer: .skip 100 /*********************************//* code section *//*********************************/.text.global main main: // entry of program ldr x0,qAdrszString1 // input string address ldr x1,qAdrsBuffer // output buffer address bl split ldr x0,qAdrsBuffer bl affichageMess // display message ldr x0,qAdrszCarriageReturn bl affichageMess 100: // standard end of the program mov x0,0 // return code mov x8,EXIT // request to exit program svc 0 // perform the system call qAdrszString1: .quad szString1qAdrszCarriageReturn: .quad szCarriageReturnqAdrsBuffer: .quad sBuffer /******************************************************************//* generate value */ /******************************************************************//* x0 contains the address of input string *//* x1 contains the address of output buffer */ split: stp x1,lr,[sp,-16]! // save registers mov x4,0 // indice loop input string mov x5,0 // indice buffer ldrb w2,[x0,x4] // read first char in reg x2 cbz x2,4f // if null -> end strb w2,[x1,x5] // store char in buffer add x5,x5,1 // increment location buffer1: ldrb w3,[x0,x4] //read char[x4] in reg x3 cbz x3,4f // if null end cmp x2,x3 // compare two characters bne 2f strb w3,[x1,x5] // = -> store char in buffer b 3f // loop2: mov x2,',' // else store comma in buffer strb w2,[x1,x5] // store char in buffer add x5,x5,1 mov x2,' ' // and store space in buffer strb w2,[x1,x5] add x5,x5,1 strb w3,[x1,x5] // and store input char in buffer mov x2,x3 // and maj x2 with new char3: add x5,x5,1 // increment indices add x4,x4,1 b 1b // and loop4: strb w3,[x1,x5] // store zero final in buffer100: ldp x1,lr,[sp],16 // restaur 2 registers ret // return to address lr x30/********************************************************//* File Include fonctions *//********************************************************//* for this file see task include a file in language AArch64 assembly */.include "../includeARM64.inc"
gg, HHH, 5, YY, ++, ///, \
PROC Split(CHAR ARRAY s) BYTE i CHAR curr,last i=1 last=s(1) Put('") WHILE i<=s(0) DO curr=s(i) IF curr#last THEN Print(", ") FI Put(curr) last=curr i==+1 OD Put('")RETURNPROC Test(CHAR ARRAY s) PrintF("Input: ""%S""%E",s) Print("Split: ") Split(s) PutE() PutE()RETURNPROC Main() Test("gHHH5YY++///\") Test("gHHH 5++,,,///\")RETURNScreenshot from Atari 8-bit computer
Input: "gHHH5YY++///\"Split: "g, HHH, 5, YY, ++, ///, \"Input: "gHHH 5++,,,///\"Split: "g, HHH, , 5, ++, ,,,, ///, \"
withAda.Text_IO;procedureSplitisprocedurePrint_Tokens(s:String)isi,j:Integer:=s'First;beginloopwhilej<=s'Lastand thens(j)=s(i)loopj:=j+1;endloop;ifi/=s'firstthenAda.Text_IO.Put(", ");endif;Ada.Text_IO.Put(s(i..j-1));i:=j;exitwhenj>s'last;endloop;endPrint_Tokens;beginPrint_Tokens("gHHH5YY+++");endsplit;
BEGIN # returns s with ", " added between each change of character # PROC split on characters = ( STRING s )STRING: IF s = "" THEN # empty string # "" ELSE # allow for 3 times as many characters as in the string # # this would handle a string of unique characters # [ 3 * ( ( UPB s - LWB s ) + 1 ) ]CHAR result; INT r pos := LWB result; CHAR s char := s[ LWB s ]; FOR s pos FROM LWB s TO UPB s DO IF s char /= s[ s pos ] THEN # change of character - insert ", " # result[ r pos ] := ","; result[ r pos + 1 ] := " "; r pos +:= 2; s char := s[ s pos ] FI; result[ r pos ] := s[ s pos ]; r pos +:= 1 OD; # return the used portion of the result # result[ 1 : r pos - 1 ] FI ; # split on characters # print( ( split on characters( "gHHH5YY++///\" ), newline ) )END
g, HHH, 5, YY, ++, ///, \
VERSION 1: string
#include<basico.h>#define INICIO 1#define CHARS "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789\"+-/ \\:,;:_*"algoritmoobjetivo="gHHH5YY\"\"++ ///,,,\\",indice=0largo=0,sublargo=0,v=0#( largo = len(indice:=(onechar(CHARS,objetivo))) )t=0,nuevo=""paracadacaracter(v,indice,largo)#(t = replicate(v, sublargo := ((poschar(INICIO, v, objetivo) - 1 ) ) ))#(nuevo = cat( cat(nuevo, t), ", "))objetivo+=sublargosiguientenuevo-=2imprimir("NEW STRING=\n",nuevo,NL)terminar
$ hopper3 basica/splitrep.bas NEW STRING=g, HHH, 5, YY, "", ++, , ///, ,,,, \
VERSION 2: arrays
#include<basico.h>#define INICIO 1#define CHARS "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789\"+-/ \\:,;:_*"algoritmoobjetivo="gHHH5YY\"\"++ ///,,,,\\",indice=0largo=0,sublargo=0,lista={},v=0#( largo = len(indice:=(onechar(CHARS,objetivo))) )paracadacaracter(v,indice,largo)#( replicate(v, sublargo := ((poschar(INICIO, v, objetivo) - 1 ))))meteren(lista)objetivo+=sublargosiguienteimprimir("LISTA=\n",lista,NL)terminar
$ hopper3 basica/splitrep2.bas LISTA=g,HHH,5,YY,"",++, ,///,,,,,,\
REM >splitDECLAREEXTERNALFUNCTIONFN_split$PRINTFN_split$("gHHH5YY++///\")ENDEXTERNALFUNCTIONFN_split$(s$)LETc$=s$(1:1)LETsplit$=""FORi=1TOLEN(s$)LETd$=s$(i:i)IFd$<>c$THENLETsplit$=split$&", "LETc$=d$ENDIFLETsplit$=split$&d$NEXTiLETFN_split$=split$ENDFUNCTION
g, HHH, 5, YY, ++, ///, \
split←2↓∘∊(⊂', '),¨(⊢≠¯1⌽⊢)⊂⊢
split 'gHHH5YY++///\'g, HHH, 5, YY, ++, ///, \
intercalate(", ",¬map(curry(intercalate)'s|λ|(""),¬group("gHHH5YY++///\\")))--> "g, HHH, 5, YY, ++, ///, \\"-- GENERIC FUNCTIONS ------------------------------------------------------------ curry :: (Script|Handler) -> Scriptoncurry(f)scripton|λ|(a)scripton|λ|(b)|λ|(a,b)ofmReturn(f)end|λ|endscriptend|λ|endscriptendcurry-- foldl :: (a -> b -> a) -> a -> [b] -> aonfoldl(f,startValue,xs)tellmReturn(f)setvtostartValuesetlngtolengthofxsrepeatwithifrom1tolngsetvto|λ|(v,itemiofxs,i,xs)endrepeatreturnvendtellendfoldl-- group :: Eq a => [a] -> [[a]]ongroup(xs)scripteqon|λ|(a,b)a=bend|λ|endscriptgroupBy(eq,xs)endgroup-- groupBy :: (a -> a -> Bool) -> [a] -> [[a]]ongroupBy(f,xs)setmftomReturn(f)scriptenGroupon|λ|(a,x)iflengthof(activeofa)>0thensethtoitem1ofactiveofaelsesethtomissing valueendififhis notmissing valueandmf's|λ|(h,x)then{active:(activeofa)&x,sofar:sofarofa}else{active:{x},sofar:(sofarofa)&{activeofa}}endifend|λ|endscriptiflengthofxs>0thentellfoldl(enGroup,{active:{item1ofxs},sofar:{}},tail(xs))iflengthof(itsactive)>0thenitssofar&itsactiveelse{}endifendtellelse{}endifendgroupBy-- intercalate :: Text -> [Text] -> Textonintercalate(strText,lstText)set{dlm,mytext item delimiters}to{mytext item delimiters,strText}setstrJoinedtolstTextastextsetmytext item delimiterstodlmreturnstrJoinedendintercalate-- map :: (a -> b) -> [a] -> [b]onmap(f,xs)tellmReturn(f)setlngtolengthofxssetlstto{}repeatwithifrom1tolngsetendoflstto|λ|(itemiofxs,i,xs)endrepeatreturnlstendtellendmap-- Lift 2nd class handler function into 1st class script wrapper-- mReturn :: Handler -> ScriptonmReturn(f)ifclassoffisscriptthenfelsescriptproperty|λ|:fendscriptendifendmReturn-- tail :: [a] -> [a]ontail(xs)iflengthofxs>1thenitems2thru-1ofxselse{}endifendtail
g, HHH, 5, YY, ++, ///, \
(Also case-sensitve.)
onsplitAtCharacterChanges(input)setlento(countinput)if(len<2)thenreturninputsetchrstoinput'scharacterssetcurrentChrtobeginningofchrsconsideringcaserepeatwithifrom2tolensetthisChrtoitemiofchrsif(thisChris notcurrentChr)thensetitemiofchrsto", "&thisChrsetcurrentChrtothisChrendifendrepeatendconsideringsetastidtoAppleScript'stext item delimiterssetAppleScript'stext item delimitersto""setoutputtochrsastextsetAppleScript'stext item delimiterstoastidreturnoutputendsplitAtCharacterChanges-- Test code:splitAtCharacterChanges("gHHH5YY++///\\")
"g, HHH, 5, YY, ++, ///, \\"useAppleScriptversion"2.4"-- OS X 10.10 (Yosemite) or lateruseframework"Foundation"onsplitAtCharacterChanges(input)tell(current application'sclass"NSMutableString"'sstringWithString:(input))to¬return(itsstringByReplacingOccurrencesOfString:("(.)\\1*+(?!$)")withString:("$0, ")¬options:(current application'sNSRegularExpressionSearch)range:({0,its|length|()}))astextendsplitAtCharacterChanges-- Test code:splitAtCharacterChanges("gHHH5YY++///\\")
"g, HHH, 5, YY, ++, ///, \\"/* ARM assembly Raspberry PI *//* program splitcar.s */ /************************************//* Constantes *//************************************/.equ STDOUT, 1 @ Linux output console.equ EXIT, 1 @ Linux syscall.equ WRITE, 4 @ Linux syscall/*********************************//* Initialized data *//*********************************/.dataszCarriageReturn: .asciz "\n"szString1: .asciz "gHHH5YY++///\\"/* IMPORTANT REMARK for compiler as The way to get special characters into a string is to escape these characters: precede themwith a backslash ‘\’ character. For example ‘\\’ represents one backslash: the first \ isan escape which tells as to interpret the second character literally as a backslash (whichprevents as from recognizing the second \ as an escape character).*//*********************************//* UnInitialized data *//*********************************/.bss sBuffer: .skip 100/*********************************//* code section *//*********************************/.text.global main main: @ entry of program ldr r0,iAdrszString1 @ input string address ldr r1,iAdrsBuffer @ output buffer address bl split ldr r0,iAdrsBuffer bl affichageMess @ display message ldr r0,iAdrszCarriageReturn bl affichageMess 100: @ standard end of the program mov r0, #0 @ return code mov r7, #EXIT @ request to exit program svc #0 @ perform the system call iAdrszString1: .int szString1iAdrszCarriageReturn: .int szCarriageReturniAdrsBuffer: .int sBuffer/******************************************************************//* generate value */ /******************************************************************//* r0 contains the address of input string *//* r1 contains the address of output buffer */split: push {r1-r5,lr} @ save registers mov r4,#0 @ indice loop input string mov r5,#0 @ indice buffer ldrb r2,[r0,r4] @ read first char in reg r2 cmp r2,#0 @ if null -> end beq 3f strb r2,[r1,r5] @ store char in buffer add r5,#1 @ increment location buffer1: ldrb r3,[r0,r4] @read char[r4] in reg r3 cmp r3,#0 @ if null end beq 3f cmp r2,r3 @ compare two characters streqb r3,[r1,r5] @ = -> store char in buffer beq 2f @ loop mov r2,#',' @ else store comma in buffer strb r2,[r1,r5] @ store char in buffer add r5,#1 mov r2,#' ' @ and store space in buffer strb r2,[r1,r5] add r5,#1 strb r3,[r1,r5] @ and store input char in buffer mov r2,r3 @ and maj r2 with new char2: add r5,#1 @ increment indices add r4,#1 b 1b @ and loop3: strb r3,[r1,r5] @ store zero final in buffer100: pop {r1-r5,lr} bx lr @ return /******************************************************************//* display text with size calculation */ /******************************************************************//* r0 contains the address of the message */affichageMess: push {r0,r1,r2,r7,lr} @ save registres mov r2,#0 @ counter length 1: @ loop length calculation ldrb r1,[r0,r2] @ read octet start position + index cmp r1,#0 @ if 0 its over addne r2,r2,#1 @ else add 1 in the length bne 1b @ and loop @ so here r2 contains the length of the message mov r1,r0 @ address message in r1 mov r0,#STDOUT @ code to write to the standard output Linux mov r7, #WRITE @ code call system "write" svc #0 @ call systeme pop {r0,r1,r2,r7,lr} @ restaur des 2 registres */ bx lr @ return output : gg, HHH, 5, YY, ++, ///, \parts:[]current:""loopsplit{gHHH5YY++///\}'ch[switchor?empty?currentcontains?currentch->'current++ch['parts++currentcurrent:newch]]'parts++currentprintparts
g HHH 5 YY ++ /// \
Split_Change(str){fori,vinStrSplit(str)res.=(v=prev)?v:(res?", ":"")v,prev:=vreturnres}
Examples:
str:="gHHH5YY++///\"MsgBox%Split_Change(str)
Outputs:
g, HHH, 5, YY, ++, ///, \
Split_Change(str){returnRegExReplace(str,"(.)\1*(?!$)","$0, ")}
Examples:
str:="gHHH5YY++///\"MsgBox%Split_Change(str)
Outputs:
g, HHH, 5, YY, ++, ///, \
# syntax: GAWK -f SPLIT_A_CHARACTER_STRING_BASED_ON_CHANGE_OF_CHARACTER.AWKBEGIN{str="gHHH5YY++///\\"printf("old: %s\n",str)printf("new: %s\n",split_on_change(str))exit(0)}functionsplit_on_change(str,c,i,new_str){new_str=substr(str,1,1)for(i=2;i<=length(str);i++){c=substr(str,i,1)if(substr(str,i-1,1)!=c){new_str=new_str", "}new_str=new_strc}return(new_str)}
old: gHHH5YY++///\new: g, HHH, 5, YY, ++, ///, \
Literal strings in BaCon are passed to the C compiler as they are; a backslash therefore needs to be escaped.
txt$="gHHH5YY++///\\"c$=LEFT$(txt$,1)FORx=1TOLEN(txt$)d$=MID$(txt$,x,1)IFd$<>c$THENPRINT", ";c$=d$ENDIFPRINTd$;NEXT
g, HHH, 5, YY, ++, ///, \
TheGW-BASIC solution works without any changes.
10subsplit$(instring$)20iflen(instring$)<2thensplit$=instring$30ret$=left$(instring$,1)40fori=2tolen(instring$)50ifmid$(instring$,i,1)<>mid$(instring$,i-1,1)thenret$=ret$+", "60ret$=ret$+mid$(instring$,i,1)70nexti80split$=ret$90endsub100printsplit$("gHHH5YY++///\")110end
g, HHH, 5, YY, ++, ///, \
10IST$="gHHH5YY++///\"20IFLEN(IST$)<2THENRET$=IST$:GOTO8030RET$=LEFT$(IST$,1)40FORI=2TOLEN(IST$)50IFMID$(IST$,I,1)<>MID$(IST$,I-1,1)THENRET$=RET$+", "60RET$=RET$+MID$(IST$,I,1)70NEXTI80PRINTRET$90END
g, HHH, 5, YY, ++, ///, \
TheGW-BASIC solution works without any changes.
DECLAREFUNCTIONsplit$(instring$)PRINTsplit$("gHHH5YY++///\")ENDFUNCTIONsplit$(instring$)IFLEN(instring$)<2THENsplit$=instring$ret$=LEFT$(instring$,1)FORi=2TOLEN(instring$)IFMID$(instring$,i,1)<>MID$(instring$,i-1,1)THENret$=ret$+", "ret$=ret$+MID$(instring$,i,1)NEXTisplit$=ret$ENDFUNCTION
g, HHH, 5, YY, ++, ///, \
TheQBasic solution works without any changes.
PRINTsplit$("gHHH5YY++///\")FUNCTIONsplit$(instring$)IFLEN(instring$)<2THENsplit$=instring$ret$=LEFT$(instring$,1)FORi=2TOLEN(instring$)IFMID$(instring$,i,1)<>MID$(instring$,i-1,1)THENret$=ret$+", "ret$=ret$+MID$(instring$,i,1)NEXTisplit$=ret$ENDFUNCTION
g, HHH, 5, YY, ++, ///, \
FUNCTIONsplit$(instring$)IFlen(instring$)<2thenLETsplit$=instring$LETret$=(instring$)[1:1]FORi=2tolen(instring$)IF(instring$)[i:i+1-1]<>(instring$)[i-1:i-1+1-1]thenLETret$=ret$&", "LETret$=ret$&(instring$)[i:i+1-1]NEXTiLETsplit$=ret$ENDFUNCTIONPRINTsplit$("gHHH5YY++///\")END
Same as QBasic entry.
functionsplit$(instring$)iflength(instring$)<2thenreturninstring$ret$=left(instring$,1)fori=2tolength(instring$)ifmid(instring$,i,1)<>mid(instring$,i-1,1)thenret$+=", "ret$+=mid(instring$,i,1)nextireturnret$endfunctionprintsplit$("gHHH5YY++///\")
REM >splitPRINTFN_split("gHHH5YY++///\")ENDDEFFN_split(s$)LOCALc$,split$,d$,i%c$=LEFT$(s$,1)split$=""FORi%=1TOLENs$LETd$=MID$(s$,i%,1)IFd$<>c$THENsplit$+=", "c$=d$ENDIFsplit$+=d$NEXT=split$
g, HHH, 5, YY, ++, ///, \
Split←(+`⊏⊸»⊸≠)⊸⊔Join←{∾⟜𝕨⊸∾´𝕩}", "Join⟜Split"gHHH5YY++///\"
"g, HHH, 5, YY, ++, ///, \"
#include<stdio.h>#include<stdlib.h>#include<string.h>char*split(char*str);intmain(intargc,char**argv){charinput[13]="gHHH5YY++///\\";printf("%s\n",split(input));}char*split(char*str){charlast=*str,*result=malloc(3*strlen(str)),*counter=result;for(char*c=str;*c;c++){if(*c!=last){strcpy(counter,", ");counter+=2;last=*c;}*counter=*c;counter++;}*(counter--)='\0';returnrealloc(result,strlen(result));}
g, HHH, 5, YY, ++, ///, \
#include<stdio.h>voidsplit(constchar*src,char*dst){constchar*src_tmp=src;char*dst_tmp=dst;while(*src_tmp!='\0'){inti=0;// scan for the next change of character occurrencewhile(*(src_tmp+++i)==*src_tmp);// copy the sequence of repeated characters to the destination buffersnprintf(dst_tmp,i+1,"%s",src_tmp);// add a comma or null character (if end of string) to the destination// buffer and advance both the source and destination pointerssnprintf(dst_tmp+=i,3,"%s",*(src_tmp+=i)=='\0'?"\0":", ");dst_tmp+=2;}}intmain(void){constchar*str="gHHH5YY++///\\";charout[100];// must be large enough to hold the resultsplit(str,out);printf("%s\n",out);return0;}
g, HHH, 5, YY, ++, ///, \
usingSystem;usingSystem.Linq;usingSystem.Collections.Generic;publicclassProgram{strings=@"gHHH5YY++///\";Console.WriteLine(s.RunLengthSplit().Delimit(", "));}publicstaticclassExtensions{publicstaticIEnumerable<string>RunLengthSplit(thisstringsource){using(varenumerator=source.GetEnumerator()){if(!enumerator.MoveNext())yieldbreak;charprevious=enumerator.Current;intcount=1;while(enumerator.MoveNext()){if(previous==enumerator.Current){count++;}else{yieldreturnnewstring(Enumerable.Repeat(previous,count).ToArray());previous=enumerator.Current;count=1;}}yieldreturnnewstring(Enumerable.Repeat(previous,count).ToArray());}}publicstaticstringDelimit<T>(thisIEnumerable<T>source,stringseparator="")=>string.Join(separator??"",source);}
g, HHH, 5, YY, ++, ///, \
// Solution for http://rosettacode.org/wiki/Split_a_character_string_based_on_change_of_character#include<string>#include<iostream>autosplit(conststd::string&input,conststd::string&delim){std::stringres;for(autoch:input){if(!res.empty()&&ch!=res.back())res+=delim;res+=ch;}returnres;}intmain(){std::cout<<split("gHHH5 ))YY++,,,///\\",", ")<<std::endl;}
g, HHH, 5, , )), YY, ++, ,,,, ///, \
(defnprint-cchanges[s](println(clojure.string/join", "(map first(re-seq#"(.)\1*"s)))))(print-cchanges"gHHH5YY++///\\")
g, HHH, 5, YY, ++, ///, \
% Split a string based on a change of charactersplit_on_change = iter (s: string) yields (string) part: string := "" for c: char in string$chars(s) do if ~string$empty(part) cand part[string$size(part)] ~= c then yield(part) part := "" end part := part || string$c2s(c) end yield(part)end split_on_change start_up = proc () po: stream := stream$primary_output() str: string := "gHHH5YYY++///\\" % \\ escapes, as in C rslt: string := "" first: bool := true for part: string in split_on_change(str) do if first then first := false else rslt := rslt || ", " end rslt := rslt || part end stream$putl(po, rslt)end start_up
g, HHH, 5, YYY, ++, ///, \
identificationdivision.program-id.split-ch.datadivision.1split-strpic x(30)valuespace.88str-1value"gHHH5YY++///\".88str-2value"gHHH5 ))YY++,,,///\".1binary.2ptrpic 9(4)value1.2str-startpic 9(4)value1.2delim-lenpic 9(4)value1.2split-str-lenpic 9(4)value0.2trash-9pic 9(4)value0.1delim-charpic xvaluespace.1delim-strpic x(6)valuespace.1trash-xpic x.proceduredivision.display"Requested string"setstr-1totrueperformsplit-init-and-godisplayspacedisplay"With spaces and commas"setstr-2totrueperformsplit-init-and-gostoprun.split-init-and-go.move1toptrmove0tosplit-str-lenperformsplit.split.performget-split-str-lendisplaysplit-str(1:split-str-len)performuntilptr>split-str-lenmoveptrtostr-startmovesplit-str(ptr:1)todelim-charunstringsplit-str(1:split-str-len)delimitedalldelim-charintotrash-xdelimiterdelim-strpointerptrend-unstringsubtractstr-startfromptrgivingdelim-lenmovesplit-str(str-start:delim-len)todelim-str(1:delim-len)displaydelim-str(1:delim-len)withnoadvancingifptr<=split-str-lendisplay", "withnoadvancingend-ifend-performdisplayspace.get-split-str-len.inspectfunctionreverse(split-str)tallyingtrash-9forleadingspacesplit-str-lenforcharactersafterspace.endprogramsplit-ch.
Requested stringgHHH5YY++///\g, HHH, 5, YY, ++, ///, \With spaces and commasgHHH5 ))YY++,,,///\g, HHH, 5, , )), YY, ++, ,,,, ///, \
(defunsplit(string)(loop:forprev:=nil:thenc:forc:acrossstring:do(formatt"~:[~;, ~]~c"(andprev(char/=cprev))c)))(split"gHHH5YY++///\\")
g, HHH, 5, YY, ++, ///, \
Doing more work that what's being ask, the following solution builds a list of strings then output it:
(defunsplit(string)(flet((make-buffer()(make-array0:element-type'character:adjustablet:fill-pointert)))(loopwithbuffer=(make-buffer)withresultforprev=nilthencforcacrossstringwhen(andprev(char/=cprev))do(pushbufferresult)(setfbuffer(make-buffer))do(vector-push-extendcbuffer)finally(pushbufferresult)(formatt"~{~A~^, ~}"(nreverseresult)))))(split"gHHH5YY++///\\")
g, HHH, 5, YY, ++, ///, \
include "cowgol.coh";sub split(in: [uint8], buf: [uint8]): (out: [uint8]) is out := buf; loop [buf] := [in]; if [in] == 0 then break; end if; if [in] != [@next in] and [@next in] != 0 then [buf+1] := ','; [buf+2] := ' '; buf := buf+2; end if; buf := buf+1; in := in+1; end loop;end sub;var buf: uint8[32];print(split("gHHH5YY++//\\", &buf[0]));print_nl();g, HHH, 5, YY, ++, //, \
["gHHH5YY++///\\","aaabbbaaabcdeef"].eachdo|s|putssputs" -> "+s.scan(/(.)\1*/).flatten.join(", ")end
gHHH5YY++///\ -> g, HHH, 5, YY, ++, ///, \aaabbbaaabcdeef -> aaa, bbb, aaa, b, c, d, ee, f
importstd.stdio;voidmain(){autosource="gHHH5YY++///\\";charprev=source[0];foreach(ch;source){if(prev!=ch){prev=ch;write(", ");}write(ch);}writeln();}
g, HHH, 5, YY, ++, ///, \
Stringsplit(Stringinput,Stringdelim){Stringres='';for(inti=0;i<input.length;i++){if(res.isNotEmpty&&input[i]!=res[res.length-1]){res+=delim;}res+=input[i];}returnres;}voidmain(){print(split("gHHH5 ))YY++,,,///\\",", "));}
g, HHH, 5, , )), YY, ++, ,,,, ///, \
functionSplitStringCharChange(S:string):string;{Split string whenever the previous char is different from the current one}varI:integer;varC:char;beginResult:='';{Copy string to output}forI:=1toLength(S)dobeginResult:=Result+S[I];{Appended ", " if the next char is different}if(I<Length(S))and(S[I]<>S[I+1])thenResult:=Result+', ';end;end;procedureShowSplitString(Memo:TMemo);constS1='gHHH5YY++///\';varS2:string;beginMemo.Lines.Add(S1);S2:=SplitStringCharChange(S1);Memo.Lines.Add(S2);end;
gHHH5YY++///\g, HHH, 5, YY, ++, ///, \Elapsed Time: 1.767 ms.
func String.SmartSplit() { var c var str = "" var last = this.Length() - 1 for n in 0..last { if c && this[n] != c { str += ", " } c = this[n] str += c } str} print("gHHH5YY++///\\".SmartSplit())g, HHH, 5, YY, ++, ///, \
a$ = "gHHH5YY++///\\"a$[] = strchars a$cp$ = a$[1]for c$ in a$[] if c$ <> cp$ s$ &= ", " cp$ = c$ . s$ &= c$.print s$
g, HHH, 5, YY, ++, ///, \
# by Artyom BologovH# add commans to all sequences of the same character# (effectively splits on character change)g/./s/\(\(.\)\2*\)/\1,/g# Strip off the last commas/, $//,pQ
$ ed -s char-change.in < char-change.ed Newline appendedg, HHH, 5, YY, ++, ///, \
split=fnstr->IO.puts" input string:#{str}"String.graphemes(str)|>Enum.chunk_by(&(&1))|>Enum.map_join(", ",&Enum.join&1)|>fns->IO.puts"output string:#{s}"end.()endsplit.("gHHH5YY++///\\")
input string: gHHH5YY++///\output string: g, HHH, 5, YY, ++, ///, \
Emacs Lisp uses the backslash for an escape character. This causes a problem if the backslash is the last character in a quoted string, because the backslash escapes the end quotation mark, meaning that the string is not terminated.
The solution was to put the input string in a buffer, where no quotes are required. Then read the buffer one character at a time, building up a list of ascii values corresponding to each character. Finally, process the list to show each character the ascii value corresponds to and splitting the output with each change of character.
(defunget-char-values()"List ascii values of chars in buffer named test-string."(let((my-chars)(current-point1))(with-current-buffer"test-string"(while(char-aftercurrent-point)(push(char-aftercurrent-point)my-chars)(setqcurrent-point(1+current-point)))(nreversemy-chars))))(defunshow-chars(ascii-values)"Show characters from VALUES."(let*((first-char(nth0ascii-values))(current-char)(current-position1)(first-elementt)(separator", "))(whenfirst-element(setqfirst-elementnil)(setqprevious-charfirst-char)(insertfirst-char))(while(<current-position(lengthascii-values))(setqcurrent-char(nthcurrent-positionascii-values))(if(equalcurrent-charprevious-char)(insertcurrent-char)(insertseparator)(insertcurrent-char))(setqprevious-charcurrent-char)(setqcurrent-position(1+current-position)))))
(show-chars (get-char-values))
g, HHH, 5, YY, ++, ///, \
openSystem.Text.RegularExpressionsletsplitRunss=Regex("""(.)\1*""").Matches(s)|>Seq.cast<Match>|>Seq.map(funm->m.Value)|>Seq.toListprintfn"%A"(splitRuns"""gHHH5YY++///\""")
["g"; "HHH"; "5"; "YY"; "++"; "///"; "\"]
USE:splitting.monotonic"gHHH5YY++///\\""aaabbccccdeeff"[[=]monotonic-split", "joinprint]bi@
g, HHH, 5, YY, ++, ///, \aaa, bb, cccc, d, ee, ff
CREATEA0,:C@A+A@C@[1CHARS]LA+!;:SPLIT.( c-addr u --)SWAPA!A@C@BEGINOVERWHILEC@A+TUCK<>IF.","THENDUPEMITSWAP1-SWAPREPEATDROP;:TESTOVEROVER."input:"TYPECR."split:"SPLIT.CR;s"gHHH5YY++///\"TESTs"gHHH5 ))YY++,,,///\"TESTBYE
input: gHHH5YY++///\split: g, HHH, 5, YY, ++, ///, \input: gHHH5 ))YY++,,,///\split: g, HHH, 5, , )), YY, ++, ,,,, ///, \
This is F77 style, except for theEND SUBROUTINE SPLATTER which would be justEND, which for F90 is also allowable outside of the MODULE protocol. Linking the start/stop markers by giving the same name is helpful, especially when the compiler checks for this. The $ symbol at the end of a FORMAT code sequence is a common F77 extension, meaning "do not finish the line" so that a later output will follow on. This is acceptable to F90 and is less blather than adding the term,ADVANCE = "NO" inside a WRITE statement that would otherwise be required. Output is to I/O unit6 which is the modern default for "standard output". The format code isA meaning "any number of characters" rather thanA1 for "one character" so as to accommodate not just the single character from TEXT but also the two characters of ", " for the splitter between sequences. Alas, there is no provision to change fount or colour for this, to facilitate the reader's attempts to parse the resulting list especially when the text includes commas or spaces of its own. By contrast, with quoted strings, the standard protocol is to double contained quotes.
An alternative method would be to prepare the entire output in a CHARACTER variable then write that, but this means answering the maddening question "how long is a piece of string?" for that variable, though later Fortran has arrangements whereby a text variable is resized to suit on every assignment, as inTEMP = TEMP // more - but this means repeatedly copying the text to the new manifestation of the variable. Still another approach would be to prepare an array of fingers to each split point (as inPhrase_reversals#Fortran) so that the final output would be a single WRITE using that array, and again, how big must the array be? At most, as big as the number of characters in TEXT. With F90, subroutines can declare arrays of a size determined on entry, with something likeINTEGER A(LEN(TEXT))
If the problem were to be solved by writing a "main line" only, there would have to be a declaration of the text variable there but since a subroutine can receive a CHARACTER variable of any size (the actual size is passed as a secret parameter), this can be dodged.
For this example a DO-loop stepping along the text is convenient, but in a larger context it would probably be most useful to work along the text with fingers L1 and L2 marking the start and finish positions of each sequence.
SUBROUTINESPLATTER(TEXT)!Print a comma-separated list. Repeated characters constitute one item.Can'tdisplaytheinsertedcommasinadifferentcoloursoasnottolooklikeanycommasinTEXT.CHARACTER*(*)TEXT!The text.INTEGERL!A finger.CHARACTER*1C!A state follower.IF(LEN(TEXT).LE.0)RETURN!Prevent surprises in the following..C=TEXT(1:1)!Syncopation: what went before.DOL=1,LEN(TEXT)!Step through the text.IF(C.NE.TEXT(L:L))THEN!A change of character?C=TEXT(L:L)!Yes. This is the new normal.WRITE(6,1)", "!Set off from what went before. This is not from TEXT.END IF!So much for changes.WRITE(6,1)C!Roll the current character. (=TEXT(L:L))1FORMAT(A,$)!The $ sez: do not end the line.END DO!On to the next character.WRITE(6,1)!Thus end the line. No output item means that the $ is not reached, so the line is ended.END SUBROUTINESPLATTER!TEXT with spaces, or worse, commas, will produce an odd-looking list.PROGRAMPOKECALLSPLATTER("gHHH5YY++///\")!The example given.END
Unfortunately, the syntax highlighter has failed to notice the terminating quote character, presumably because the preceding backslash might be an "escape sequence" trigger, a facilitynot used in Fortran textliterals except possibly as a later modernist option.
g, HHH, 5, YY, ++, ///, \
functionsplit(instringasstring)asstringiflen(instring)<2thenreturninstringdimasstringret=left(instring,1)foriasuinteger=2tolen(instring)ifmid(instring,i,1)<>mid(instring,i-1,1)thenret+=", "ret+=mid(instring,i,1)nextireturnretendfunction
s = "gHHH5YY++///\\"println[join[", ", map[getFunction["first", 1], s =~ %r/((.)\2*)/g]]]
g, HHH, 5, YY, ++, ///, \
FB can process either Pascal strings (slowly being deprecated), or Apple's Core Foundation CFStrings (and Objective-C NSStrings). Here's the old-school Pascal string function:
local fn SplitString( inputStr as Str255 ) as Str255Str255 resultStrNSUInteger iif len$( inputStr ) < 2 then resultStr = inputStr : exit fnresultStr = left$( inputStr, 1 )for i = 2 to len$( inputStr ) if mid$( inputStr, i, 1 ) <> mid$( inputStr, i - 1, 1 ) then resultStr = resultStr + ", " resultStr = resultStr + mid$(inputStr, i, 1)nextend fn = resultStrwindow 1print fn SplitString( "gHHH5YY++///\" )HandleEvents
And here's the recommended CFString counterpart:
local fn SplitString( inputStr as CFStringRef ) as CFStringRef NSUInteger i unichar chr, lastChr = fn StringCharacterAtIndex( inputStr, 0 ) CFMutableStringRef resultStr = fn MutableStringWithCapacity(0) for i = 0 to len( inputStr ) - 1 chr = fn StringCharacterAtIndex( inputStr, i ) if ( chr != lastChr ) then MutableStringAppendString( resultStr, @", " ) MutableStringAppendString( resultStr, mid( inputStr, i, 1 ) ) lastChr = chr nextend fn = resultStrwindow 1print fn SplitString( @"gHHH5YY++///\\" )HandleEvents
Output for either function:
g, HHH, 5, YY, ++, ///, \
Treating "character" as a byte:
packagemainimport("fmt""strings")funcmain(){fmt.Println(scc(`gHHH5YY++///\`))}funcscc(sstring)string{iflen(s)<2{returns}varbstrings.Builderp:=s[0]b.WriteByte(p)for_,c:=range[]byte(s[1:]){ifc!=p{b.WriteString(", ")}b.WriteByte(c)p=c}returnb.String()}
g, HHH, 5, YY, ++, ///, \
importData.List(group,intercalate)main::IO()main=putStrLn$intercalate", "(group"gHHH5YY++///\\")
g, HHH, 5, YY, ++, ///, \
or as a hand-written fold:
importData.List(intercalate)importData.Bool(bool)charGroups::String->[String]charGroups=letgo(a,b)(s,groups)|a==b=(b:s,groups)|otherwise=([a],bools[b](nulls):groups)inuncurry(:).foldrgo([],[]).(zip<*>tail)main::IO()main=putStrLn$intercalate", "$charGroups"gHHH5YY++///\\"
g, HHH, 5, YY, ++, ///, \
or in terms ofspan:
importData.List(intercalate)charGroups::String->[String]charGroups[]=[]charGroups(c:cs)=let(xs,ys)=span(c==)csin(c:xs):charGroupsysmain::IO()main=putStrLn$intercalate", "$charGroups"gHHH5YY++///\\"
g, HHH, 5, YY, ++, ///, \
100 LET S$="gHHH5YY++///\"110 PRINT S$(1);120 FOR I=2 TO LEN(S$)130 IF S$(I)<>S$(I-1) THEN PRINT ", ";140 PRINT S$(I);150 NEXT160 PRINT
Solution:
splitChars=:(1,~2~:/\])<;.2]delimitChars=:', 'joinstringsplitChars
Example Usage:
delimitChars'gHHH5YY++///\'g,HHH,5,YY,++,///,\
You can use a regular expression to capture every character preceded by 0 or more of itself.
importjava.util.regex.Matcher;importjava.util.regex.Pattern;
Stringsplit(Stringstring){Patternpattern=Pattern.compile("(.)\\1*");Matchermatcher=pattern.matcher(string);StringBuilderstrings=newStringBuilder();intindex=0;while(matcher.find()){if(index++!=0)strings.append(", ");strings.append(matcher.group());}returnstrings.toString();}
g, HHH, 5, YY, ++, ///, \
An alternate demonstration
packageorg.rosettacode;importjava.util.ArrayList;importjava.util.List;/** * This class provides a main method that will, for each arg provided, * transform a String into a list of sub-strings, where each contiguous * series of characters is made into a String, then the next, and so on, * and then it will output them all separated by a comma and a space. */publicclassSplitStringByCharacterChange{publicstaticvoidmain(String...args){for(Stringstring:args){List<String>resultStrings=splitStringByCharacter(string);Stringoutput=formatList(resultStrings);System.out.println(output);}}/** * @param string String - String to split * @return List<\String> - substrings of contiguous characters */publicstaticList<String>splitStringByCharacter(Stringstring){List<String>resultStrings=newArrayList<>();StringBuildercurrentString=newStringBuilder();for(intpointer=0;pointer<string.length();pointer++){currentString.append(string.charAt(pointer));if(pointer==string.length()-1||currentString.charAt(0)!=string.charAt(pointer+1)){resultStrings.add(currentString.toString());currentString=newStringBuilder();}}returnresultStrings;}/** * @param list List<\String> - list of strings to format as a comma+space-delimited string * @return String */publicstaticStringformatList(List<String>list){StringBuilderoutput=newStringBuilder();for(intpointer=0;pointer<list.size();pointer++){output.append(list.get(pointer));if(pointer!=list.size()-1){output.append(", ");}}returnoutput.toString();}}
g, HHH, 5, YY, ++, ///, \
(()=>{"use strict";// ----------- SPLIT ON CHARACTER CHANGES ------------constmain=()=>group("gHHH5YY++///\\").map(x=>x.join("")).join(", ");// --------------------- GENERIC ---------------------// group :: [a] -> [[a]]constgroup=xs=>// A list of lists, each containing only// elements equal under (===), such that the// concatenation of these lists is xs.groupBy(a=>b=>a===b)(xs);// groupBy :: (a -> a -> Bool) [a] -> [[a]]constgroupBy=eqOp=>// A list of lists, each containing only elements// equal under the given equality operator,// such that the concatenation of these lists is xs.xs=>0<xs.length?(()=>{const[h,...t]=xs;const[groups,g]=t.reduce(([gs,a],x)=>eqOp(x)(a[0])?(Tuple(gs)([...a,x])):Tuple([...gs,a])([x]),Tuple([])([h]));return[...groups,g];})():[];// Tuple (,) :: a -> b -> (a, b)constTuple=a=>b=>({type:"Tuple","0":a,"1":b,length:2,*[Symbol.iterator](){for(constkinthis){if(!isNaN(k)){yieldthis[k];}}}});// MAIN ---returnmain();})();
g, HHH, 5, YY, ++, ///, \
Or, in terms of a general `span` function:
(()=>{"use strict";// -------- STRING SPLIT ON CHARACTER CHANGES --------// charGroups :: String -> [String]constcharGroups=s=>// The characters of s split at each point where// consecutive characters differ.0<s.length?(()=>{constc=s[0],[xs,ys]=span(x=>c===x)([...s.slice(1)]);return[[c,...xs],...charGroups(ys)].map(zs=>[...zs].join(""));})():"";// ---------------------- TEST -----------------------// main :: IO()constmain=()=>charGroups("gHHH5YY++///\\").join(", ");// --------------------- GENERIC ---------------------// span :: (a -> Bool) -> [a] -> ([a], [a])constspan=p=>// Longest prefix of xs consisting of elements which// all satisfy p, tupled with the remainder of xs.xs=>{consti=xs.findIndex(x=>!p(x));return-1!==i?[xs.slice(0,i),xs.slice(i)]:[xs,[]];};// MAIN ---returnmain();})();
g, HHH, 5, YY, ++, ///, \
# input: a string# output: a stream of runsdef runs: def init: explode as $s | $s[0] as $i | (1 | until( $s[.] != $i; .+1)); if length == 0 then empty elif length == 1 then . else init as $n | .[0:$n], (.[$n:] | runs) end;"gHHH5YY++///\\" | [runs] | join(", ")Using the -r ("raw output") command-line option of jq:
g, HHH, 5, YY, ++, ///, \
Showing off a little unit testing...
Starting with
#!/usr/bin/env jsish;'Split a string based on change of character, in Jsish';functionsplitOnChange(str:string):string{if(str.length<2)returnstr;varlast=str[0];varresult=last;for(varpos=1;pos<str.length;pos++){result+=((last==str[pos])?last:', '+str[pos]);last=str[pos];}returnresult;}provide('splitOnChange',1.0);/* literal backslash needs escaping during initial processing */;splitOnChange('gHHH5YY++///\\');;splitOnChange('a');;splitOnChange('ab');;splitOnChange('aaa');;splitOnChange('aaaba');;splitOnChange('gH HH5YY++//,/\\');
Then
prompt$ jsish -u -update true splitOnChange.jsiCreated splitOnChange.jsi
Giving
#!/usr/bin/env jsish;'Split a string based on change of character, in Jsish';functionsplitOnChange(str:string):string{if(str.length<2)returnstr;varlast=str[0];varresult=last;for(varpos=1;pos<str.length;pos++){(last==str[pos])?result+=last:result+=', '+str[pos];last=str[pos];}returnresult;}provide('splitOnChange',1.0);/* literal backslash needs escaping during initial processing */;splitOnChange('gHHH5YY++///\\');;splitOnChange('a');;splitOnChange('ab');;splitOnChange('aaa');;splitOnChange('aaaba');;splitOnChange('gH HH5YY++//,/\\');/*=!EXPECTSTART!='Split a string based on change of character, in Jsish'splitOnChange('gHHH5YY++///\') ==> g, HHH, 5, YY, ++, ///, \splitOnChange('a') ==> asplitOnChange('ab') ==> a, bsplitOnChange('aaa') ==> aaasplitOnChange('aaaba') ==> aaa, b, asplitOnChange('gH HH5YY++//,/\') ==> g, H, , HH, 5, YY, ++, //, ,, /, \=!EXPECTEND!=*/
Which tests as:
prompt$ jsish -u splitOnChange.jsi[PASS] splitOnChange.jsi
And then satisfying the task of showing the one result, using the script as a module:
prompt$ jsishJsish interactive: see 'help [cmd]'. \ cancels > input. ctrl-c aborts running script.# require('splitOnChange');1# puts(splitOnChange('gHHH5YY++///\\'));g, HHH, 5, YY, ++, ///, \# v0.6usingIterToolsstr="gHHH5YY++///\\"sep=map(join,groupby(identity,str))println("string:$str\nseparated: ",join(sep,", "))
string: gHHH5YY++///\separated: g, HHH, 5, YY, ++, ///, \
split:{(&~=':x)_x}","/split"gHHH5YY++///\\"
"g,HHH,5,YY,++,///,\\"
// version 1.0.6funsplitOnChange(s:String):String{if(s.length<2)returnsvart=s.take(1)for(iin1untils.length)if(t.last()==s[i])t+=s[i]elset+=", "+s[i]returnt}funmain(args:Array<String>){vals="""gHHH5YY++///\""" println(splitOnChange(s))}
g, HHH, 5, YY, ++, ///, \
funsplitOnChange(src:String):String=src.fold(""){acc,c->if(acc.isEmpty()||acc.last()==c)"$acc$c"else"$acc,$c"}funmain(){splitOnChange("""gHHH5YY++///\""").also { println(it)}}
g, HHH, 5, YY, ++, ///, \
{defmysplit{defmysplit.r{lambda{:w:i}{if{>:i{W.length:w}}thenelse{if{not{W.equal?{W.get:i:w}{W.get{+:i1}:w}}}then____else}{W.get{+:i1}:w}{mysplit.r:w{+:i1}}}}}{lambda{:w}{S.replace____byin{mysplit.r#:w0}}}}->mysplit{mysplitgHHH5YY++///\}->gHHH5YY++///\
Note that the backslash must be quoted as a double backslash as Lua uses C-like escape sequences.
functioncharSplit(inStr)localoutStr,nextChar=inStr:sub(1,1)forpos=2,#inStrdonextChar=inStr:sub(pos,pos)ifnextChar~=outStr:sub(#outStr,#outStr)thenoutStr=outStr..", "endoutStr=outStr..nextCharendreturnoutStrendprint(charSplit("gHHH5YY++///\\"))
g, HHH, 5, YY, ++, ///, \
Alternative:Simply scan difference in reverse order and insert delimiter in place, the loop counter i will not update with length of s.
functionsplitdiff(s)fori=#s,2,-1doifs:sub(i,i)~=s:sub(i-1,i-1)thens=s:sub(1,i-1)..', '..s:sub(i,-1)endendreturnsend
#!/bin/ksh# Split a character string based on change of character## Variables:#str='gHHH5YY++///\'delim=', '## Functions:### Function _splitonchg(str, delim) - return str split by delim at char change#function_splitonchg{typeset_str;_str="$1"typeset_delim;_delim="$2"typeset_i_splitstr;integer_ifor((_i=1;_i<${#_str}+1;_i++));doif[["${_str:$((_i-1)):1}"!="${_str:${_i}:1}"]];then_splitstr+="${_str:$((_i-1)):1}${_delim}"else_splitstr+="${_str:$((_i-1)):1}"fidoneecho"${_splitstr%"${_delim}"*}"}####### main #######print"Original:${str}"print" Split:$(_splitonchg"${str}""${delim}")"
Original: gHHH5YY++///\
Split: g, HHH, 5, YY, ++, ///, \
Stack New open a new stack object as current stack, and keep the old one. After the end of block execution old stack get back as current stack. Data statement push to bottom (we read from top, so using data we get a FIFO type). Letter$ pops a string or raise an error if no string found at the top of stack.
ModulePrintParts(splitthis$){Defstringm$,p$DeflongcStackNew{iflen(splitthis$)=0thenexitFori=1tolen(splitthis$)p$=mid$(splitthis$,i,1)ifm$<>p$then{ifc>0thendatastring$(m$,c)m$=p$c=1}elsec++Nextiifc>0thendatastring$(m$,c)Whilestack.size>1{Printletter$+", ";}IfnotemptythenPrintletter$}}PrintParts"gHHH5YY++///\"' version 13 of M2000 Interpreter - old program run as is.' [] get the current stack as object and leave an empty stack as current stack,' array([]) empty the stack object (a linked linst) making a tuple (of variant type).' #str$(", ") extract from array all items render as string and place between the ", " string - or a space by default, without argument.MODULEPrintParts(splitthisASSTRING){IFLEN(splitthis)=0THENPRINT:EXITSTRINGm,pLONGc,iSTACKNEW{FORi=1TOLEN(splitthis)p=mid$(splitthis,i,1)IFm<>pTHENIFc>0THENDATASTRING$(m,c)m=p:c=1ELSEc++ENDIFNEXTIFc>0THENDATASTRING$(m,c)PRINTARRAY([])#STR$(", ")}}PrintParts"gHHH5YY++///\"
Added an additional backlash to escape the \ character at the end.
splitChange:=proc(str::string)localstart,i,len;start:=1;len:=StringTools:-Length(str);forifrom2tolendoifstr[i]<>str[start]thenprintf("%s, ",str[start..i-1]);start:=i:endif;enddo;printf("%s",str[start..len]);endproc;splitChange("gHHH5YY++///\\");
g, HHH, 5, YY, ++, ///, \
The backslash (\) must be escaped with another backslash when defining the string.
StringJoin@@Riffle[StringCases["gHHH5YY++///\\",p:(x_)..->p],", "]
g, HHH, 5, YY, ++, ///, \
s="gHHH5YY++///\"output=[]lastLetter=s[0]forletterinsifletter!=lastLetterthenoutput.push", "output.pushletterlastLetter=letterendforprintoutput.join("")
g, HHH, 5, YY, ++, ///, \
MODULECharacterChange;FROMTerminalIMPORTWrite,WriteString,WriteLn,ReadChar;PROCEDURESplit(str:ARRAYOFCHAR);VARi:CARDINAL;c:CHAR;BEGINFORi:=0TOHIGH(str)DOIFi=0THENc:=str[i]ELSIFstr[i]#cTHENc:=str[i];WriteLn;END;Write(c)ENDENDSplit;CONSTEX="gHHH5YY++///\";BEGINSplit(EX);ReadCharENDCharacterChange.
gHHH5YY++///\
Let's create a general-purpose routine that can be used for this problem.
(define(monotonic-sliceslstkey-func(cmp=))(let(result'()tmp'()old-key0new-key0)(dolist(xlst)(set'new-key(key-funcx))(cond((empty?tmp)(pushxtmp))((cmpnew-keyold-key)(pushxtmp))(true(push(reversetmp)result)(set'tmp(listx))))(set'old-keynew-key))(unless(empty?tmp)(push(reversetmp)result))(reverseresult)))(monotonic-slices'(023457)odd?)===>((02)(3)(4)(57))(monotonic-slices'(02334879)or>)===>((023)(348)(79))
So the solution is simply:
(define(monotonic-substrstr(key-funcor)(cmp=))(let(expl(explodestr))(mapjoin(monotonic-slicesexplkey-funccmp))))(define(foostr)(join(monotonic-substrstr)", "))(println(foo"gHHH5YY++///\\"))g,HHH,5,YY,++,///,\
Substrings with ascending characters:
(monotonic-substr"abccty-2890-box"or>)("abc""cty""-289""0""-box")
Each substring must consist entirely of vowels or non-vowels.
(monotonic-substr"cooling glee"(fn(c)(true?(findc"aeiou"))))("c""oo""l""i""ng gl""ee")
procsplitOnDiff(str:string):string=result=""ifstr.len<1:returnresultvarprevChar:char=str[0]foridxin0..<str.len:ifstr[idx]!=prevChar:result&=", "prevChar=str[idx]result&=str[idx]assertsplitOnDiff("""X""")=="""X"""assertsplitOnDiff("""XX""")=="""XX"""assertsplitOnDiff("""XY""")=="""X, Y"""assertsplitOnDiff("""gHHH5YY++///\""")=="""g, HHH, 5, YY, ++, ///, \"""echosplitOnDiff("""gHHH5YY++///\""")
g, HHH, 5, YY, ++, ///, \
'gHHH5YY++///\' | str replace -a -r '(?s)(?<=(.))(?!\1)(?=.)' ', '
Or, doing a real split in between:
'gHHH5YY++///\' | parse -r '(?s)\G((.)\2*+)' | get capture0 | str join ', '
g, HHH, 5, YY, ++, ///, \
ParseArgstr./*obtain optional arguments from the CL*/Ifstr==''Thenstr='gHHH5YY++///\'/*Not specified? Then use the default.*/i=1ol=''DoForeverj=verify(str,substr(str,i,1),'N',i,99)/* find first character that's different */Ifj=0ThenDo/* End of strin reached */ol=ol||substr(str,i)/* the final substring */LeaveEndol=ol||substr(str,i,j-i)', '/* add substring and delimiter */i=jEndSayol
g, HHH, 5, YY, ++, ///, \
programSplitChars;{$IFDEF FPC}{$MODE DELPHI}{$COPERATORS ON}{$ENDIF}constTestString='gHHH5YY++///\';functionSplitAtChars(constS:String):String;vari:integer;lastChar:Char;beginresult:='';IFlength(s)>0thenbeginLastChar:=s[1];result:=LastChar;Fori:=2tolength(s)dobeginifs[i]<>lastCharthenbeginlastChar:=s[i];result+=', ';end;result+=LastChar;end;end;end;BEGINwriteln(SplitAtChars(TestString));end.
g, HHH, 5, YY, ++, ///, \
##vars:='gHHH5YY++///\';s.AdjacentGroup.Select(a->a.JoinToString).JoinToString(', ').Print
g, HHH, 5, YY, ++, ///, \
usestrict;usewarnings;usefeature'say';useutf8;binmode(STDOUT,':utf8');formy$string(q[gHHH5YY++///\\],q[fffn⃗n⃗n⃗»»» ℵℵ☄☄☃☃̂☃🤔🇺🇸🤦♂️👨👩👧👦]){my@S;my$last='';while($string=~ /(\X)/g){if($lasteq$1){$S[-1].=$1}else{push@S,$1}$last=$1;}say"Orginal: $string\n Split: 「".join('」, 「',@S)."」\n";}
Orginal: gHHH5YY++///\ Split: 「g」, 「HHH」, 「5」, 「YY」, 「++」, 「///」, 「\」Orginal: fffn⃗n⃗n⃗»»» ℵℵ☄☄☃☃̂☃🤔🇺🇸🤦♂️👨👩👧👦 Split: 「fff」, 「」, 「n⃗n⃗n⃗」, 「»»»」, 「 」, 「ℵℵ」, 「☄☄」, 「☃」, 「☃̂」, 「☃」, 「🤔」, 「🇺🇸」, 「🤦♂️」, 「👨👩👧👦」
functionsplit_on_change(strings)stringres=""iflength(s)thenintegerprev=s[1]fori=1tolength(s)dointegerch=s[i]ifch!=prevthenres&=", "prev=chendifres&=chendforendifreturnresendfunctionputs(1,split_on_change(`gHHH5YY++///\`))
g, HHH, 5, YY, ++, ///, \
/# Rosetta Code problem: https://rosettacode.org/wiki/Split_a_character_string_based_on_change_of_characterby Galileo, 11/2022 #/include ..\Utilitys.pmt"""gHHH5YY++///\" 1 get >pslen for get dup tps == if rot swap chain swap else ps> drop >ps swap ", " tps chain chain swap endifendforpstack
["g, HHH, 5, YY, ++, ///, \", "gHHH5YY++///\"]=== Press any key to exit ===
(de splitme (Str) (let (Str (chop Str) Fin) (glue ", " (make (for X Str (if (= X (car Fin)) (conc Fin (cons X)) (link (setq Fin (cons X))) ) ) ) ) ) )(prinl (splitme "gHHH5YY++///\\"))
g, HHH, 5, YY, ++, ///, \
stringinput="gHHH5YY++///\\";// \ needs escapingstringlast_char;foreach(input/1,stringchar){if(last_char&&char!=last_char)write(", ");write(char);last_char=char;}
g, HHH, 5, YY, ++, ///, \
To make sense of this example, you must understand riders. A rider is a simple abstraction for efficiently parsing strings. A rider is a record with an original substring, a source substring, and a token substring.
After executing the following code, for example:
Put "abcdef" into a string.Slap a rider on the string.
The rider looks like this:
Original: "abcdef"Source: "abcdef"Token: ""
Now when weBump the rider., it looks like this:
Original: "abcdef"Source: "bcdef"Token: "a"
Another bump, and:
Original: "abcdef"Source: "cdef"Token: "ab"
Now let's say we have a complete token and want to start a new one. We canPosition the rider's token on the rider's source.and now the rider looks like this:
Original: "abcdef"Source: "cdef"Token: ""
And that's all there is to it.
To run:Start up.Split "gHHH5YY++///\" into some string things by change of character.Write the string things on the console.Destroy the string things.Wait for the escape key.Shut down.To split a string into some string things by change of character:If the string's length is less than 2, add the string to the string things; exit.Slap a rider on the string.Loop.Move the rider (change of character rules).Add the rider's token to the string things.If the rider's source is blank, exit.Repeat.To move a rider (change of character rules):Position the rider's token on the rider's source.Loop.If the rider's source is blank, exit.If the rider's token is blank, bump the rider; repeat.Put the rider's token's last plus 1 into a byte pointer.If the rider's token's last's target is not the byte pointer's target, exit.Bump the rider.Repeat.To write some string things to a console;To write some string things on a console:Get a string thing from the string things.Loop.If the string thing is nil, write "" on the console; exit.Write the string thing's string on the console without advancing.If the string thing's next is not nil, write ", " on the console without advancing.Put the string thing's next into the string thing.Repeat.
g, HHH, 5, YY, ++, ///, \
functionSplit-String([string]$String){[string]$c=$String.Substring(0,1)[string]$splitString=$cfor($i=1;$i-lt$String.Length;$i++){[string]$d=$String.Substring($i,1)if($d-ne$c){$splitString+=", "$c=$d}$splitString+=$d}$splitString}
Split-String"gHHH5YY++///\"
g, HHH, 5, YY, ++, ///, \
Proceduresplitstring(s$)Define*p.Character=@s$,c_buf.c=*p\cWhile*p\cIf*p\c=c_bufPrint(Chr(c_buf))ElsePrint(", ")c_buf=*p\cContinueEndIf*p+SizeOf(Character)WendEndProcedureIfOpenConsole()splitstring("gHHH5YY++///\")Input()EndIf
g, HHH, 5, YY, ++, ///, \
Using [itertools.groupby].
fromitertoolsimportgroupbydefsplitter(text):return', '.join(''.join(group)forkey,groupingroupby(text))if__name__=='__main__':txt='gHHH5YY++///\\'# Note backslash is the Python escape char.print(f'Input:{txt}\nSplit:{splitter(txt)}')
Input: gHHH5YY++///\Split: g, HHH, 5, YY, ++, ///, \
defsplitterz(text):return(''.join(x+(''ifx==nxtelse', ')forx,nxtinzip(txt,txt[1:]+txt[-1])))if__name__=='__main__':txt='gHHH5YY++///\\'print(splitterz(txt))
g, HHH, 5, YY, ++, ///, \
importitertoolstry:input=raw_inputexcept:passs=input()groups=[]for_,ginitertools.groupby(s):groups.append(''.join(g))print(' input string:%s'%s)print(' output string:%s'%', '.join(groups))
when using the default input
input string: gHHH5YY++///\ output string: g, HHH, 5, YY, ++, ///, \
[ dup size 2 < iff size done behead swap [] nested join witheach [ over != if [ drop i^ 1+ conclude ] ] ] is $run ( $ --> n )[ dup size 2 < if done dup $run split dup [] = iff drop done dip [ $ ", " join ] recurse join ] is runs$ ( $ --> $ )
Testing in Quackery shell.
/O> $ "gHHH5YY++///\" runs$ echo$... g, HHH, 5, YY, ++, ///, \Stack empty.
The "\" needs to be escaped for the string to be valid.
split_by_char<-function(s){chars<-strsplit(s,"")|>unlist()|>rle()chunks<-strrep(chars$values,chars$lengths)cat(chunks,sep=", ")}split_by_char("gHHH5YY++///\\ ,,")
g, HHH, 5, YY, ++, ///, \, , ,,
#langracket(define(split-strings-on-changes)(maplist->string(group-byvalues(string->lists)char=?)))(displayln(string-join(split-strings-on-change#<<<gHHH5YY++///\<)", "))
g, HHH, 5, YY, ++, ///, \
(formerly Perl 6)
subgroup-chars ($str) {$str.comb: / (.)$0* / }# Testing:forQ[gHHH5YY++///\],Q[fffn⃗n⃗n⃗»»» ℵℵ☄☄☃☃̂☃🤔🇺🇸🤦♂️👨👩👧👦] ->$string {put'Original: ',$string;put' Split: ',group-chars($string).join(', ');}
Original: gHHH5YY++///\ Split: g, HHH, 5, YY, ++, ///, \Original: fffn⃗n⃗n⃗»»» ℵℵ☄☄☃☃̂☃🤔🇺🇸🤦♂️👨👩👧👦 Split: fff, , n⃗n⃗n⃗, »»», , ℵℵ, ☄☄, ☃, ☃̂, ☃, 🤔, 🇺🇸, 🤦♂️, 👨👩👧👦
The second test-case is to show that Raku works with strings on the Unicode grapheme level, handles whitespace, combiners, and zero width characters up to Unicode Version 13.0 correctly. (Raku generally tracks updates to the Unicode spec and typically lags no more than a month behind.) For those of you with browsers unable to display the second string, it consists of:
$ENTRY Go { = <Prout <Join (', ') <Split 'gHHH5YY++///\\'>>>;};Split { (e.Cur) = (e.Cur); (e.Cur s.1) s.1 e.X = <Split (e.Cur s.1 s.1) e.X>; (e.Cur) s.1 e.X = (e.Cur) <Split (s.1) e.X>; s.1 e.X = <Split (s.1) e.X>;};Join { (e.Joiner) = ; (e.Joiner) (e.Str) = e.Str; (e.Joiner) (e.Str) e.Strs = e.Str e.Joiner <Join (e.Joiner) e.Strs>;};g, HHH, 5, YY, ++, ///, \
/*REXX program splits a string based on change of character ───► a comma delimited list.*/parseargstr/*obtain optional arguments from the CL*/ifstr==''thenstr='gHHH5YY++///\'/*Not specified? Then use the default.*/p=left(str,1)/*placeholder for the "previous" string*/$=/* " " " output " */doj=1forlength(str);@=substr(str,j,1)/*obtain a character from the string. */if@\==pthen$=$', '/*Not replicated char? Append delimiter*/p=@;$=$||@/*append a character to the $ string.*/end/*j*//* [↓] keep peeling chars until done. */say' input string: 'str/*display the original string & output.*/say' output string: '$/*stick a fork in it, we're all done. */
input string: gHHH5YY++///\ output string: g, HHH, 5, YY, ++, ///, \
/* REXX */Parseargstr/*obtain optional arguments from the CL*/ifstr==''thenstr='gHHH5YY++///\'/*Not specified? Then use the default.*/input=strx=''cp=''result=''DoWhilestr<>''ParseVarstrc+1strIfc==cpThenx=x||cElseDoIfx>>''Thenresult=result||x', 'x=cEndcp=cEndresult=result||xsay' input string: 'inputsay' output string: 'result
{{out]]
input string: gHHH5YY++///\ output string: g, HHH, 5, YY, ++, ///, \
see split("gHHH5YY++///\")func split(s ) c =left (s, 1) split = "" for i = 1 to len(s) d = substr(s, i, 1) if d != c split = split + ", " c = d ok split = split + d next return splitOutput:
g, HHH, 5, YY, ++, ///, \
≪ → text ≪ "" text 1 1 SUB 1 text SIZEFOR j text j DUP SUBIF DUP2 ≠THEN SWAP DROP ", " OVER +END ROT SWAP + SWAPNEXTDROP≫ ≫ ‘COMASPLT’ STOdefsplit(str)puts" input string:#{str}"s=str.chars.chunk(&:itself).map{|_,a|a.join}.join(", ")puts"output string:#{s}"sendsplit("gHHH5YY++///\\")
input string: gHHH5YY++///\output string: g, HHH, 5, YY, ++, ///, \
traitRosettaExt{fnsplit_duplicates(&self)->SplitDuplicates;}implRosettaExtfor&str{fnsplit_duplicates(&self)->SplitDuplicates{SplitDuplicates{string:self}}}structSplitDuplicates<'a>{string:&'astr,}impl<'a>IteratorforSplitDuplicates<'a>{typeItem=&'astr;fnnext(&mutself)->Option<Self::Item>{ifself.string.is_empty(){None}else{// We want to keep track of the char boundaries when we splitletmutchars=self.string.char_indices();letmutl=chars.next()?;forrinchars{// Compare characters; if they don't match, splitifl.1!=r.1{let(ret,rem)=self.string.split_at(r.0);self.string=rem;returnSome(ret);}l=r;}// No more characters to compare, return the remaining string sliceletret=self.string;// Exhaust string sliceself.string=&self.string[self.string.len()..];Some(ret)}}}fnsplitter(s:&str)->String{s.split_duplicates().collect::<Vec<_>>().join(", ")}fnmain(){lettest_string="g";println!("input string: {}",test_string);println!("output string: {}",splitter(test_string));lettest_string="";println!("input string: {}",test_string);println!("output string: {}",splitter(test_string));lettest_string="gHHH5YY++///\\";println!("input string: {}",test_string);println!("output string: {}",splitter(test_string));}
input string: goutput string: ginput string: output string: input string: gHHH5YY++///\output string: g, HHH, 5, YY, ++, ///, \
[dependencies]itertools={version="0.14",features=["use_alloc"]}
use itertools::Itertools;fn split_text(s: &str) -> String { let mut r = Vec::new(); for (_, group) in &s.chars().chunk_by(|e| *e) { r.push(group.collect::<String>()) } r.join(", ")}fn main() { println!("output string: {}", split_text("gHHH5YY++///\\"));}[dependencies]itertools = { version = "0.14", default-features = false }use itertools::Itertools;fn main() { print!("output string: "); for i in "gHHH5YY++///\\".split_duplicates().intersperse(", ") { print!("{i}"); } println!();}Iterator::intersperse#![feature(iter_intersperse)]fn main() { print!("output string: "); for i in "gHHH5YY++///\\".split_duplicates().intersperse(", ") { print!("{i}"); } println!();}The soon-to-be-stabilizedintersperse is based off ofItertools, so if you can't pay the cost of usingcollect::<Vec<_>> in the first snippet, you can use either of the above. However, having bothitertools::Itertools and#![feature(iter_intersperse)] in the same scope will throw a compiler error because it doesn't know which trait method to call. To side-track this, you can use each traits' associated functions.
#![feature(iter_intersperse)]use itertools::Itertools;Iterator::intersperse(r#"gHHH5YY++///\"#.split_duplicates(), ", ");Itertools::intersperse(r#"gHHH5YY++///\"#.split_duplicates(), ", ");
// Split a (character) string into comma (plus a blank) delimited strings// based on a change of character (left to right).// See https://rosettacode.org/wiki/Split_a_character_string_based_on_change_of_character#Scaladef runLengthSplit(s: String): String = /// Add a guard letter (s + 'X').sliding(2).map(pair => pair.head + (if (pair.head != pair.last) ", " else "")).mkString("")println(runLengthSplit("""gHHH5YY++///\"""))See it in running in your browser byScalaFiddle (JavaScript)
or byScastie (JVM).
def runLengthSplit(s:String):List[String] = { def recursiveSplit(acc:List[String], rest:String): List[String] = rest match { case "" => acc case _ => { val (h, t) = rest.span(_ == rest.head) recursiveSplit(acc :+ h, t) } } recursiveSplit(Nil, s)}val result = runLengthSplit("""gHHH5YY++///\""")println(result.mkString(","))g,HHH,5,YY,++,///,\
echo 'gHHH5YY++///\' | sed 's/\(.\)\1*/&, /g;s/, $//'
Output:
g, HHH, 5, YY, ++, ///, \
program split_a_character_string_based_on_change_of_character; s := "gHHH5YY++///\\"; print(join_strings(", ", split_on_change(s))); proc split_on_change(s); parts := []; loop while s /= "" do parts with:= span(s, s(1)); end loop; return parts; end proc; proc join_strings(s, parts); if parts=[] then return ""; end if; return parts(1) +/ [s + part : part in parts(2..)]; end proc;end program;g, HHH, 5, YY, ++, ///, \
func group(str) { gather { while (var match = (str =~ /((.)\g{-1}*)/g)) { take(match[0]) } }}say group(ARGV[0] \\ 'gHHH5YY++///\\').join(', ')g, HHH, 5, YY, ++, ///, \
* Program: split_on_change_of_character.sbl* To run: sbl split_on_change_of_character.sbl* Description: Split a (character) string into comma (plus a blank)* delimited strings based on a change of character (left to right).** Blanks should be treated as any other character* (except they are problematic to display clearly).* The same applies to commas.** For instance, the string:** gHHH5YY++///\ * should be split and show:** g, HHH, 5, YY, ++, ///, \ * Comment: Tested using the Spitbol for Linux version of SNOBOL4lf = substr(&alphabet,11,1) ;* New line or line feed* Function split_cc will split a string on a change of character.define('split_cc(s)tchar,target,post'):(split_cc_end)split_cctchar = substr(s,1,1) :f(freturn)split_cc_pat = span(*tchar) . target (rpos(0) | len(1) . tchar rem) . postsplit_cc2s ? split_cc_pat = post :f(split_cc3)split_cc = (ident(split_cc) target, split_cc ', ' target) :s(split_cc2)split_cc3:(return)split_cc_endtest_string = "gHHH5YY++///\"output = test_string lfsplit_string = split_cc(test_string)output = split_stringENDgHHH5YY++///\g, HHH, 5, YY, ++, ///, \
(* * Head-Tail implementation of grouping *)fun group' ac nil = [ac] | group' nil (y::ys) = group' [y] ys | group' (x::ac) (y::ys) = if x=y then group' (y::x::ac) ys else (x::ac) :: group' [y] ysfun group xs = group' nil xsfun groupString str = String.concatWith ", " (map implode (group (explode str)))
- groupString "gHHH5YY++///\\";val it = "g, HHH, 5, YY, ++, ///, \\" : string
public extension String { func splitOnChanges() -> [String] { guard !isEmpty else { return [] } var res = [String]() var workingChar = first! var workingStr = "\(workingChar)" for char in dropFirst() { if char != workingChar { res.append(workingStr) workingStr = "\(char)" workingChar = char } else { workingStr += String(char) } } res.append(workingStr) return res }}print("gHHH5YY++///\\".splitOnChanges().joined(separator: ", "))g, HHH, 5, YY, ++, ///, \
composer splitEquals <reps> <nextReps>* rule reps: <'(.)\1*'> rule nextReps: <reps> -> \(', ' ! $ ! \)end splitEquals'gHHH5YY++///\' -> splitEquals -> !OUT::writeAnother option is to split the string into a glyph array and process that, recombining the output strings. Here in v0.5 syntax
splitEquals templates chars is [$...]; @ set [$chars(1)]; $chars(2..)... -> # ! '$@...;' ! when <|=$@(1)> do ..|@ set $; otherwise '$@...;' ! @ set [$];end splitEquals'gHHH5YY++///\' -> splitEquals !
g, HHH, 5, YY, ++, ///, \
SUB SPLITUNIQUE$(s$)DIM c$, d$, split$, i%c$ = LEFT$(s$, 1)split$ = ""FOR i% = 1 TO LEN(s$) d$ = MID$(s$, i%, 1) IF d$ <> c$ THENsplit$ = split$ + ", "c$ = d$ END IF split$ = split$ + d$NEXTRETURN split$END SUBPRINT SPLITUNIQUE$("gHHH5YY++///\")ENDThis is most concise with regular expressions. Note well the two steps: it could be achieved in one very clever regexp, but being that clever is usually a bad idea (for both readability and performance, in this case).
set string "gHHH5YY++///\\"regsub -all {(.)\1*} $string {\0, } stringregsub {, $} $string {} stringputs $stringg, HHH, 5, YY, ++, ///, \
The task doesn't state explicitly about the order in which substrings should bedisplayed. So, here are two variants: one is order-preserving, the other is notorder-preserving.
#lang transdMainModule: { s: "gHHH5YY++///\\", _start: (λ (with res "" (for c in (split s "") do (if (neq Char(c) (back res)) (+= res ", ")) (+= res c)) (textout res)) (lout "Second variant: ") (for v in (values (group-by (split s ""))) do (textout (if @idx ", ") (join v ""))) )}g, HHH, 5, YY, ++, ///, \Second variant:++, ///, 5, HHH, YY, \, g
&p/$"_, _"⊜□+1⊸⊛ "gHHH5YY++///\\"
g, HHH, 5, YY, ++, ///, \
Option ExplicitSub Split_string_based_on_change_character()Dim myArr() As String, T As StringConst STRINPUT As String = "gHHH5YY++///\"Const SEP As String = ", " myArr = Split_Special(STRINPUT) T = Join(myArr, SEP) Debug.Print Left(T, Len(T) - Len(SEP))End SubFunction Split_Special(Ch As String) As String()'return an array of StringsDim tb, i&, st As String, cpt As Long, R() As String tb = Split(StrConv(Ch, vbUnicode), Chr(0)) st = tb(LBound(tb)) ReDim R(cpt) R(cpt) = st For i = 1 To UBound(tb) If tb(i) = st Then R(cpt) = R(cpt) & st Else st = tb(i) cpt = cpt + 1 ReDim Preserve R(cpt) R(cpt) = st End If Next Split_Special = REnd Function
g, HHH, 5, YY, ++, ///, \
fn main() {println(splitter('gHHH5YY++///\\')) \\ The "\" character needs to be escaped.}fn splitter(text string) string {mut check := text.substr(0, 1)mut new_text, mut temp := '', ''for index, _ in text {temp = text.substr(index, index + 1)if temp != check {new_text = new_text + ', 'check = temp }new_text = new_text + temp }return new_text}g, HHH, 5, YY, ++, ///, \
var split = Fn.new { |s| if (s.count == 0) return "" var res = [] var last = s[0] var curr = last for (c in s.skip(1)) { if (c == last) { curr = curr + c } else { res.add(curr) curr = c } last = c } res.add(curr) return res.join(", ")}var s = "gHHH5YY++///\\" System.print(split.call(s))g, HHH, 5, YY, ++, ///, \
(defun delimit (s)(defun delim (old-list new-list current-char)(if (null old-list)new-list(delim (cdr old-list) (append new-list(if (not (equal (car old-list) current-char))`(#\, #\Space ,(car old-list))(cons (car old-list) nil) ) )(car old-list) ) ) )(list->string (delim (string->list s) '() (car (string->list s)))) )(display (delimit "gHHH5YY++///\\")) ;; NB. The "\" character needs to be escaped
g, HHH, 5, YY, ++, ///, \
string 0; \change to zero-terminated conventionchar S;[S:= "gHHH5YY++///\";while S(0) do [ChOut(0, S(0)); if S(1)#S(0) & S(1)#0 then Text(0, ", "); S:= S+1; ];]
g, HHH, 5, YY, ++, ///, \
sub esplit$(instring$)if len(instring$) < 2 return instring$ ret$ = left$(instring$,1) for i = 2 to len(instring$)if mid$(instring$,i,1) <> mid$(instring$, i - 1, 1) ret$ = ret$ + ", " ret$ = ret$ + mid$(instring$, i, 1) next i return ret$end subprint esplit$("gHHH5YY++///\\")PrintChar equ &BB5A ;Amstrad CPC BIOS callTerminator equ 0 ;marks the end of a string org &8000LD HL,StringAloop:ld a,(HL);load a char from (HL)cp Terminator ;is it the terminator?ret z;if so, exitld e,a;store this char in E temporarilyinc hl;next charld a,(HL);get next charcp Terminator ;is the next char the terminator?jp z,StringDone ;if so, print E and exit.;needed to prevent the last char from getting a comma and space.dec hl;go back one so we don't skip any charscp e ;does (HL) == (HL+1)?push afld a,ecall PrintChar;either way, print E to screen.pop af;retrieve the results of the last compare.jr z,SkipComma;if A=E, no comma or space. Just loop again.ld a,','call PrintCharld a,' 'call PrintCharSkipComma:inc hl;next charjp loop;back to startStringDone:ld a,e;last character in string is printed here.jp PrintCharReturnToBasic:RETStringA:byte "gHHH5YY++///\",0
g, HHH, 5, YY, ++, ///, \
fcn group(str){ C,out := str[0],Sink(C); foreach c in (str[1,*]){ out.write(if(c==C) c else String(", ",C=c)) } out.close();}group("gHHH5YY++///\\").println();g, HHH, 5, YY, ++, ///, \
10 LET s$="gHHH5YY++///\" 20 LET c$=s$(1) 30 LET n$=c$ 40 FOR i=2 TO LEN s$ 50 IF s$(i)<>c$ THEN LET n$=n$+", " 60 LET n$=n$+s$(i) 70 LET c$=s$(i) 80 NEXT i 90 PRINT n$
g, HHH, 5, YY, ++, ///, \