Hi there,
Can anyone point out any really obvious flaws in the methodology below
to determine the likely encoding of a file, please? I know the number
of types of encoding is small, but that is only because the
possibilities I need to work with is a small list.
[color=blue]
> private string determineFileEn coding(FileStre am strm)
> {
> long originalSize = strm.Length;
> StreamReader rdr = new StreamReader(st rm);
>
> strm.Position = 0;
> System.Text.UTF 8Encoding unic = new System.Text.UTF 8Encoding();
> byte[] inputFile = unic.GetBytes(r dr.ReadToEnd()) ;
> if(inputFile.Le ngth == originalSize)
> {
> return "UTF8";
> }
>
> strm.Position = 0;
> System.Text.Uni codeEncoding unic2 = new System.Text.Uni codeEncoding();
> byte[] inputFile2 = unic2.GetBytes( rdr.ReadToEnd() );
> if(inputFile2.L ength == originalSize)
> {
> return "Unicode";
> }
>
> strm.Position = 0;
> System.Text.UTF 7Encoding unic3 = new System.Text.UTF 7Encoding();
> byte[] inputFile3 = unic3.GetBytes( rdr.ReadToEnd() );
> if(inputFile3.L ength == originalSize)
> {
> return "UTF7";
> }
>
> System.Text.ASC IIEncoding unic4 = new System.Text.ASC IIEncoding();
> byte[] inputFile4 = unic3.GetBytes( rdr.ReadToEnd() );
> if(inputFile4.L ength == originalSize)
> {
> return "Ascii";
> }
>
> return "Not known";
> }[/color]
Thanks in advance
Marc.
Can anyone point out any really obvious flaws in the methodology below
to determine the likely encoding of a file, please? I know the number
of types of encoding is small, but that is only because the
possibilities I need to work with is a small list.
[color=blue]
> private string determineFileEn coding(FileStre am strm)
> {
> long originalSize = strm.Length;
> StreamReader rdr = new StreamReader(st rm);
>
> strm.Position = 0;
> System.Text.UTF 8Encoding unic = new System.Text.UTF 8Encoding();
> byte[] inputFile = unic.GetBytes(r dr.ReadToEnd()) ;
> if(inputFile.Le ngth == originalSize)
> {
> return "UTF8";
> }
>
> strm.Position = 0;
> System.Text.Uni codeEncoding unic2 = new System.Text.Uni codeEncoding();
> byte[] inputFile2 = unic2.GetBytes( rdr.ReadToEnd() );
> if(inputFile2.L ength == originalSize)
> {
> return "Unicode";
> }
>
> strm.Position = 0;
> System.Text.UTF 7Encoding unic3 = new System.Text.UTF 7Encoding();
> byte[] inputFile3 = unic3.GetBytes( rdr.ReadToEnd() );
> if(inputFile3.L ength == originalSize)
> {
> return "UTF7";
> }
>
> System.Text.ASC IIEncoding unic4 = new System.Text.ASC IIEncoding();
> byte[] inputFile4 = unic3.GetBytes( rdr.ReadToEnd() );
> if(inputFile4.L ength == originalSize)
> {
> return "Ascii";
> }
>
> return "Not known";
> }[/color]
Thanks in advance
Marc.
Comment