view string.rhope @ 77:a748300a4143

Some untested progress on String
author Mike Pavone <pavone@retrodev.com>
date Thu, 08 Jul 2010 09:57:16 -0400
parents f7bcf3db1342
children 4d5ea487f810
line wrap: on
line source


UTF8 Expect[num,arr,index,count,consumed:out]
{
	byte <- [arr]Index[index]
	{
		If[[128u8]>[byte]]
		{
			//Error: ASCII byte when we were expecting part of a mutlibyte sequence
			//treat each byte as a separate character
			ncount <- [1i32]+[[count]+[consumed]]
		}{
			If[[192u8]>[byte]]
			{
				If[[num]=[1]]
				{
					//Sequence is complete count as single character
					ncount <- [1i32]+[count]
				}{
					out <- UTF8 Expect[[num]-[1], arr, [index]+[1], count, [1i32]+[consumed]]
				}
			}{
				//Error: too high to be a continuation byte
				ncount <- [1i32]+[[count]+[consumed]]
			}
		}
	}{
		//Error: string ended in the middle of a multi-byte sequence
		out <- [count]+[consumed]
	}
	Val[ncount]
	{
		[arr]Next[index]
		{
			out <- Count UTF8[arr, ~, ncount]
		}{
			out <- Val[ncount]
		}
	}
}

Count UTF8[arr,index,count:out]
{
	byte <- [arr]Index[index]
	If[[128u8]>[byte]]
	{ ncount <- [1i32]+[count] }
	{
		If[[192u8]>[byte]]
		{
			//Error: Encoding for 2nd,3rd or 4th byte of sequence
			//treat as a single character
			ncount <- [1i32]+[count]
		}{
			If[[224u8]>[byte]]
			{
				out <- UTF8 Expect[1, arr, [index]+[1], count, 1]
			}{
				If[[240u8]>[byte]]
				{
					out <- UTF8 Expect[2, arr, [index]+[1], count, 1]
				}{
					If[[245u8]>[byte]]
					{
						out <- UTF8 Expect[3, arr, [index]+[1], count, 1]
					}{
						//Error: Out of range of Unicode standard
						//treat as a single character
						ncount <- [1i32]+[count]
					}
				}
			}
		}
	}
	[arr]Next[index]
	{
		out <- Count UTF8[arr, ~, ncount]
	}{
		out <- Val[ncount]
	}
}

Blueprint String
{
	Buffer
	Length(Int32,Naked)
}

String@Array[in:out(String)]
{
	out <- [[Build[String()]]Buffer <<[in]]Length <<[Count UTF8[in, 0, 0]]
}

Print@String[string:out]
{	
	//TODO: Sanitize string (remove terminal escapes and replace invalid UTF)
	write[1i32, [string]Buffer >>, Int64[[[string]Buffer >>]Length >>]]
	{ out <- write[1i32, [Array[]]Append[10u8], 1i64] }
}

Get Char[:out]
{
	read[0, [Array[]]Set[0, 0u8], 1i64]
	{}
	{ out <- String[~] }
}

_String to Int32[current,index,array:out]
{
	char <- [array]Index[index]
	{
		If[[char]<[48u8]]
		{
			out <- Val[current]
		}{
			If[[char]>[57u8]]
			{
				out <- Val[current]
			}{
				out <- _String to Int32[[[current]*[10i32]]+[Int32[[char]-[48u8]]], [index]+[1], array]
			}
		}
		
	}{
		out <- Val[current]
	}
}

Int32@String[string:out]
{
	buf <- [string]Buffer >>
	[buf]Index[0]
	{
		If[[~]=[45u8]]
		{
			out <- [0i32]-[_String to Int32[0i32, 1, buf]]
		}{
			out <- _String to Int32[0i32, 0, buf]
		}
	}{
		out <- 0i32
	}
	
}

Flatten@String[string:out]
{
	out <- string
}

_CPOff to BOff[buff,cur,expected:outcur,outboff]
{
	If[expected]
	{
		outcur <- cur
		outboff <- 0i32
	}{
		err <- If[[byte]>[192u8]] {}
		{
			err <- If[[byte]<[128u8]] {}
			{
				outcur <- _CPOff to BOff[buff, [cur]+[1i32], [expected]-[1i32]] {}
				{ outboff <- [~]+[1i32] }
			}
		}

		Val[err]
		{
			outcur <- [cur]+[1i32]
			outboff <- 1i32
		}
	}
}

CPOff to BOff[buff,cur,boff,cpoff:out]
{
	If[[cur]=[cpoff]]
	{
		out <- boff
	}{
		byte <- [buff]Index[cur]
		If[[byte] < [128u8]]
		{
			nboff <- [bof]+[1i32]
			ncur <- [cur]+[1i32]
		}{
			If[[byte]<[192u8]]
			{
				//Error: Encoding for 2nd,3rd or 4th byte of sequence
				//treat as a single character
				nboff <- [bof]+[1i32]
				ncur <- [cur]+[1i32]
			}{
				If[[byte]<[224u8]]
				{
					expect <- 1i32
				}{
					If[[byte]<[240u8]]
					{
						expect <- 2i32
					}{
						If[[byte]<[245u8]]
						{
							expect <- 3i32
						}{
							//Error
							nboff <- [bof]+[1i32]
							ncur <- [cur]+[1i32]
						}
					}
				}
				Val[expect]
				{
					ncur <- _CPOff to BOff[buff, [cur]+[1i32], expect] {}
					{ nboff <- [1i32]+[~] }
				}
			}
		}
		out <- CPOff to BOff[buff, ncur, cpoff, nboff]
	}
}

Slice@String[string,slicepoint:left,right]
{
	//TODO: Handle invalid slicepoints
	sliceoffset <- CPOff to BOff[[string]Buffer >>, 0i32, 0i32, slicepoint]
	left <- String Slice[string, 0i32, slicepoint, sliceoffset]
	right <- String Slice[string, sliceoffset, [[string]Length >>]-[slicepoint], [[[string]Buffer >>]Length]-[sliceoffset]]
}

Byte@String[string,index:out,invalid]
{
	out,invalid <- [[string]Buffer >>]Index[index]
}

Length@String[string:out]
{
	out <- [string]Length >>
}

_=String[left,right,index:out]
{
	[left]Byte[index]
	{
		,out <- If[[~]=[[right]Byte[index]]]
		{
			out <- _=String[left,right,[index]+[1]]
		}
	}{
		out <- Yes
	}
}

Eq String[left,right:out]
{
	,out <- If[[[left]Length] = [[right]Length]] 
	{
		out <- _=String[left,right,0]
	}
}

=@String[left,right:out]
{
	out <- Eq String[left,right]	
}

Byte Length@String[string:out]
{
	out <- [[string]Buffer >>]Length
}

Append@String[left,right:out]
{
	out <- String Cat[left,right]
}

Blueprint String Slice
{
	Source
	Offset(Int32,Naked)	
	Length(Int32,Naked)
	ByteLen(Int32,Naked)
}

String Slice[source,offset,length,bytelen:out(String Slice)]
{
	out <- [[[[Build[String Slice()]]Source <<[source]]Offset <<[offset]]Length <<[length]]ByteLen <<[bytelen]
}

Byte@String Slice[string,index:out,invalid]
{
	,invalid <- If[[index]<[[string]ByteLen >>]]
	{
		out,invalid <- [[string]Source >>]Byte[[index]+[[string]Offset >>]]
	}
}

Byte Length@String Slice[string:out]
{
	out <- [string]ByteLen >>
}

=@String Slice[left,right:out]
{
	out <- Eq String[left,right]
}

_Flatten@String[string,dest,offset,count:out]
{
	If[count]
	{
		out <- [string]_Flatten[[dest]Append[ [[string]Buffer >>]Index[offset] ], [offset]+[1i32], [count]-[1i32]]
	}{
		out <- dest
	}
}

Flatten@String[string:out]
{
	out <- string
}

_Flatten@String Slice[string,dest,offset,count:out]
{
	out <- [[string]Source >>]_Flatten[dest, [[string]Offset >>]+[offset], count]
}

Flatten@String Slice[string:out]
{
	out <- String[ [[string]Source >>]_Flatten[Array[], [string]Offset >>, [string]ByteLen >>] ]
}

Append@String Slice[left,right:out]
{
	out <- String Cat[left,right]
}

Blueprint String Cat
{
	Left
	Right
	Length
	ByteLen
}

String Cat[left,right:out]
{
	out <- [[[[Build[String Cat()]
		]Left <<[left]
		]Right <<[right]
		]Length <<[ [[left]Length]+[[right]Length] ]
		]ByteLen <<[ [[left]Byte Length]+[[right]Byte Length] ]
}

Append@String Cat[left,right:out]
{
	out <- String Cat[left,right]
}

Byte@String Cat[string,index:out,invalid]
{
	leftlen <- [[string]Left >>]Byte Length
	If[[index]<[leftlen]]
	{
		out,invalid <- [[string]Left >>]Byte[index]
	}{
		rindex <- [index]-[leftlen]
		,invalid <- If[[rindex]<[[[string]Right >>]Byte Length]]
		{
			out,invalid <- [[string]Right >>]Byte[rindex]
		}
	}
}

Byte Length@String Cat[string:out]
{
	out <- [string]ByteLen >>
}

_Flatten@String Cat[string,dest,offset,count:out]
{
	[string]Left >>
}