hi deadelvis…
i’ve moved the imap_pop class into a better organized set of libs so these mime decode functions are in the main class - you might have to change the method syntax to get it to work inyour set up.
anyway, this is what i use. i found some of this code on php.net and then adapted and expanded it until it works for a whole range of email strings. basically, i send everything through this. the main thing is that it just goes out into “HTML entity land” to acheive the goal of full UTF-8 decoding coverage. since you are using up-to-date PHP5 some of these problems addressed by this custom mime_decode code might be fixed. but, these work for me.
cheers.
/**
* Decode a string, return string
* Just concatenate any multiple decoded strings
*
* generally workarounds to the limitations
* of the various encoding functions
* some of these problems are fixed in PHP5
*
* the standard imap_mime_header_decode() function
* doesn't decode UTF8 properly, so we have to
* convert to HTML entities and then back again
* to make it handle both.
*
* send it a MIME encoded header string
*/
function decode_mime($raw_header)
{
$t_arr = imap_mime_header_decode($raw_header);
// concatenate the multiple strings
// check default charset is empty or linebreak etc...
// if it is empty don't include it
$text = '';
foreach ($t_arr as $item)
{
if ($item->charset === 'default')
{
if ( trim($item->text) == '' ) continue;
}
$text .= $item->text;
}
// supress 'Invalid multibyte sequence in argument' errors
//$t = @htmlentities($text, ENT_QUOTES, 'UTF-8');
$t = htmlentities($text, ENT_QUOTES, 'UTF-8');
$decoded_text = $this->_html_entity_decode_expanded($t);
return $decoded_text;
}
/**
* the standard html_entity_decode() function
* doesn't decode enough chars, so use the expanded
* translation table for entities
*/
function _html_entity_decode_expanded($str)
{
return str_replace($this->html_entity_strings, $this->html_entity_chrs, $str);
}
/**
* return single-byte character codes from
* HTML encoded entities
* for optimization, build this just once
* when creating this class
* this is used by mime_decode for custom decoding
*
* the standard html_entity_decode() function
* doesn't decode enough chars, so expand the
* translation table for entities and then
* str_replace() which mimics the decode
* decode a wider range of entites than the standard PHP code
* get the expanded translation table
* then do string replace to change values
*/
function _build_custom_html_translation_table()
{
// from php.net comments for get_html_translation_table() function
// It adds to the standard get_html_translation_table the codes of
// the characters usually M$ Word replaces into typed text.
// Otherwise those characters would never be displayed correctly
// in html output
// this also lets us take care of numerically encoded entities
// by hand like ' for single-quote which seem to be left out
$trans = get_html_translation_table(HTML_ENTITIES, ENT_QUOTES);
$trans[chr(39)] = '''; // Single Quotation Mark (numerical entity)
$trans[chr(130)] = '‚'; // Single Low-9 Quotation Mark
$trans[chr(131)] = 'ƒ'; // Latin Small Letter F With Hook
$trans[chr(132)] = '„'; // Double Low-9 Quotation Mark
$trans[chr(133)] = '…'; // Horizontal Ellipsis
$trans[chr(134)] = '†'; // Dagger
$trans[chr(135)] = '‡'; // Double Dagger
$trans[chr(136)] = 'ˆ'; // Modifier Letter Circumflex Accent
$trans[chr(137)] = '‰'; // Per Mille Sign
$trans[chr(138)] = 'Š'; // Latin Capital Letter S With Caron
$trans[chr(139)] = '‹'; // Single Left-Pointing Angle Quotation Mark
$trans[chr(140)] = 'Œ'; // Latin Capital Ligature OE
$trans[chr(145)] = '‘'; // Left Single Quotation Mark
$trans[chr(146)] = '’'; // Right Single Quotation Mark
$trans[chr(147)] = '“'; // Left Double Quotation Mark
$trans[chr(148)] = '”'; // Right Double Quotation Mark
$trans[chr(149)] = '•'; // Bullet
$trans[chr(150)] = '–'; // En Dash
$trans[chr(151)] = '—'; // Em Dash
$trans[chr(152)] = '˜'; // Small Tilde
$trans[chr(153)] = '™'; // Trade Mark Sign
$trans[chr(154)] = 'š'; // Latin Small Letter S With Caron
$trans[chr(155)] = '›'; // Single Right-Pointing Angle Quotation Mark
$trans[chr(156)] = 'œ'; // Latin Small Ligature OE
$trans[chr(159)] = 'Ÿ'; // Latin Capital Letter Y With Diaeresis
ksort($trans);
$this->html_entity_strings = array_values($trans);
$this->html_entity_chrs = array_keys($trans);
}