From a29b3ed9aaec32a0011ed71890c855c9c4e462a5 Mon Sep 17 00:00:00 2001 From: Igor Date: Fri, 17 Jan 2020 01:38:07 +0300 Subject: [PATCH] =?UTF-8?q?Create=20=D0=90=D0=B2=D1=82=D0=BE=D0=BC=D0=B0?= =?UTF-8?q?=D1=82=D0=B8=D1=87=D0=B5=D1=81=D0=BA=D0=BE=D0=B5=20=D0=BE=D0=BF?= =?UTF-8?q?=D1=80=D0=B5=D0=B4=D0=B5=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BA?= =?UTF-8?q?=D0=BE=D0=B4=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B8=20=D0=BF=D1=80?= =?UTF-8?q?=D0=B8=20=D1=87=D1=82=D0=B5=D0=BD=D0=B8=D0=B8=20=D1=84=D0=B0?= =?UTF-8?q?=D0=B9=D0=BB=D0=B0.cs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...21\204\320\260\320\271\320\273\320\260.cs" | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 "\320\241\320\275\320\270\320\277\320\277\320\265\321\202\321\213/[\320\242\320\265\320\272\321\201\321\202]/[\320\224\320\265\320\272\320\276\320\264\320\270\321\200\320\276\320\262\320\260\320\275\320\270\320\265]/\320\220\320\262\321\202\320\276\320\274\320\260\321\202\320\270\321\207\320\265\321\201\320\272\320\276\320\265 \320\276\320\277\321\200\320\265\320\264\320\265\320\273\320\265\320\275\320\270\320\265 \320\272\320\276\320\264\320\270\321\200\320\276\320\262\320\272\320\270 \320\277\321\200\320\270 \321\207\321\202\320\265\320\275\320\270\320\270 \321\204\320\260\320\271\320\273\320\260.cs" diff --git "a/\320\241\320\275\320\270\320\277\320\277\320\265\321\202\321\213/[\320\242\320\265\320\272\321\201\321\202]/[\320\224\320\265\320\272\320\276\320\264\320\270\321\200\320\276\320\262\320\260\320\275\320\270\320\265]/\320\220\320\262\321\202\320\276\320\274\320\260\321\202\320\270\321\207\320\265\321\201\320\272\320\276\320\265 \320\276\320\277\321\200\320\265\320\264\320\265\320\273\320\265\320\275\320\270\320\265 \320\272\320\276\320\264\320\270\321\200\320\276\320\262\320\272\320\270 \320\277\321\200\320\270 \321\207\321\202\320\265\320\275\320\270\320\270 \321\204\320\260\320\271\320\273\320\260.cs" "b/\320\241\320\275\320\270\320\277\320\277\320\265\321\202\321\213/[\320\242\320\265\320\272\321\201\321\202]/[\320\224\320\265\320\272\320\276\320\264\320\270\321\200\320\276\320\262\320\260\320\275\320\270\320\265]/\320\220\320\262\321\202\320\276\320\274\320\260\321\202\320\270\321\207\320\265\321\201\320\272\320\276\320\265 \320\276\320\277\321\200\320\265\320\264\320\265\320\273\320\265\320\275\320\270\320\265 \320\272\320\276\320\264\320\270\321\200\320\276\320\262\320\272\320\270 \320\277\321\200\320\270 \321\207\321\202\320\265\320\275\320\270\320\270 \321\204\320\260\320\271\320\273\320\260.cs" new file mode 100644 index 0000000..469b2de --- /dev/null +++ "b/\320\241\320\275\320\270\320\277\320\277\320\265\321\202\321\213/[\320\242\320\265\320\272\321\201\321\202]/[\320\224\320\265\320\272\320\276\320\264\320\270\321\200\320\276\320\262\320\260\320\275\320\270\320\265]/\320\220\320\262\321\202\320\276\320\274\320\260\321\202\320\270\321\207\320\265\321\201\320\272\320\276\320\265 \320\276\320\277\321\200\320\265\320\264\320\265\320\273\320\265\320\275\320\270\320\265 \320\272\320\276\320\264\320\270\321\200\320\276\320\262\320\272\320\270 \320\277\321\200\320\270 \321\207\321\202\320\265\320\275\320\270\320\270 \321\204\320\260\320\271\320\273\320\260.cs" @@ -0,0 +1,69 @@ +// Общий код +namespace Analizing +{ + public static class EncodingTest + { + /// + /// UTF8 : EF BB BF + /// UTF16 BE : FE FF + /// UTF16 LE : FF FE + /// UTF32 BE : 00 00 FE FF + /// UTF32 LE : FF FE 00 00 + /// + public static Encoding DetectEncoding(string path) + { + FileStream fstream = new FileStream(path, FileMode.OpenOrCreate); + Encoding result=Encoding.Default; + if (!fstream.CanSeek || !fstream.CanRead){ + fstream.Close(); + throw new Exception("DetectEncoding() файл не может быть прочитан"); + } + long Length_File = fstream.Length; + int Length_Probe_Read = 1000; + if (Length_Probe_Read >Length_File )Length_Probe_Read = Convert.ToInt32(Length_File); + Byte[] u8_Buf = new Byte[Length_Probe_Read]; + int s32_Count = fstream.Read(u8_Buf, 0, Length_Probe_Read); + if (s32_Count >= 2) + { + if (u8_Buf[0] == 0xFE && u8_Buf[1] == 0xFF) { + result = new UnicodeEncoding(true, true); + } + if (u8_Buf[0] == 0xFF && u8_Buf[1] == 0xFE) { + if (s32_Count >= 4 && u8_Buf[2] == 0 && u8_Buf[3] == 0) { + result = new UTF32Encoding(false, true); + } + else { + result = new UnicodeEncoding(false, true); + } + } + if (s32_Count >= 3 && u8_Buf[0] == 0xEF && u8_Buf[1] == 0xBB && u8_Buf[2] == 0xBF) { + result = Encoding.UTF8; + } + if (s32_Count >= 4 && u8_Buf[0] == 0 && u8_Buf[1] == 0 && u8_Buf[2] == 0xFE && u8_Buf[3] == 0xFF) { + result = new UTF32Encoding(true, true); + } + // проверка по коду 0xD0 + double res= 0.0; double p = 0.0 ; + for (int i=0; i 5 ) { // при ниличии символа 0xD0 больше чем 5%, можно предположить что кодировка UTF8 + result = Encoding.UTF8; + } + ///////////////////////////// + } + fstream.Close(); + return result; + } + } +} + +// В кубике + +string path = project.Directory + @"\" + project.Variables["input"].Value ; // путь к файлу +Encoding ecn1 = Analizing.EncodingTest.DetectEncoding(path); // попытка определить кодировку +project.SendInfoToLog("Определили кодировку как : " + ecn1.ToString()); // вывод кодировки в PM + +var t = File.ReadAllText(path, ecn1); // читаем весь файл +project.Variables["res"].Value = t; // ложим в переменную