Pluf Framework

Sign in or create your account | Project List | Help

Pluf Framework Commit Details

Date:2009-05-07 09:51:44 (10 months 4 days ago)
Author:Loïc d'Anterroches
Commit:8a7e834931d6c8660dc74120730247856bae5b10
Message:Added Russian encoding detection method.

Files: src/Pluf/Text/UTF8.php (1 diff)

Change Details

src/Pluf/Text/UTF8.php
115115        return true;
116116    }
117117
118    /**
119     * Detect if a string is in a Russian charset.
120     *
121     * This should be used when the mb_string detection encoding is
122     * failing. For example:
123     *
124     * <pre>
125     * $encoding = mb_detect_encoding($string, mb_detect_order(), true);
126     * if ($encoding == false) {
127     * $encoding = Pluf_Text_UTF8::detect_cyr_charset($string);
128     * }
129     * </pre>
130     *
131     * @link http://forum.php.su/topic.php?forum=1&topic=1346
132     *
133     * @param string
134     * @return string Possible Russian encoding
135     */
136    public static function detect_cyr_charset($str)
137    {
138        $charsets = array(
139                          'KOI8-R' => 0,
140                          'Windows-1251' => 0,
141                          'CP-866' => 0,
142                          'ISO-8859-5' => 0,
143                          'MacCyrillic' => 0
144                          );
145        $length = strlen($str);
146        for ($i=0; $i<$length; $i++) {
147            $char = ord($str[$i]);
148            //non-russian characters
149            if ($char < 128 || $char > 256) continue;
150
151            //CP866
152            if (($char > 159 && $char < 176) || ($char > 223 && $char < 242))
153                $charsets['CP-866']+=3;
154            if (($char > 127 && $char < 160)) $charsets['CP-866']+=1;
155
156            //KOI8-R
157            if (($char > 191 && $char < 223)) $charsets['KOI8-R']+=3;
158            if (($char > 222 && $char < 256)) $charsets['KOI8-R']+=1;
159
160            //WIN-1251
161            if ($char > 223 && $char < 256) $charsets['Windows-1251']+=3;
162            if ($char > 191 && $char < 224) $charsets['Windows-1251']+=1;
163
164            //MAC
165            if ($char > 221 && $char < 255) $charsets['MacCyrillic']+=3;
166            if ($char > 127 && $char < 160) $charsets['MacCyrillic']+=1;
167
168            //ISO-8859-5
169            if ($char > 207 && $char < 240) $charsets['ISO-8859-5']+=3;
170            if ($char > 175 && $char < 208) $charsets['ISO-8859-5']+=1;
171
172        }
173        arsort($charsets);
174        return key($charsets);
175    }
118176
119177
120178    /**

Archive Download the corresponding diff file

Branches:
master