abc_myplugin-0.1.txt
// Plugin name is optional. If unset, it will be extracted from the current
// file name. Uncomment and edit this line to override:
$plugin['name'] = 'rvm_latin1_to_utf8';
$plugin['version'] = '0.5';
$plugin['author'] = 'Ruud van Melick';
$plugin['author_uri'] = 'http://vanmelick.com/';
$plugin['description'] = 'Alter pre-MySQL 4.1 tables to use UTF-8 charset';
// Plugin types:
// 0 = regular plugin; loaded on the public web side only
// 1 = admin plugin; loaded on both the public and admin side
// 2 = library; loaded only when include_plugin() or require_plugin() is called
$plugin['type'] = 1;
@include_once('zem_tpl.php');
if (0) {
?>
# --- BEGIN PLUGIN HELP ---
h1. latin1 to utf8
*WARNING: carefully read this documentation and BACKUP YOUR DATABASE before activating/using this plugin.*
Textpattern always uses 'utf8' to store data in the database, but depending on which MySQL version you used when installing Textpattern, the character set of the tables and columns differs:
# MySQL versions older than 4.1: the character set of the tables created by Textpattern is typically 'latin1'.
# MySQL versions 4.1 and higher: the tables are set to use the 'utf8' character set, which matches the actual character set of the data stored in those tables.
Having the tables/columns themselves use the same character set as the data stored in them has some advantages when performing searches or sorting in Textpattern, especially if you use characters outside US-ASCII.
If you upgrade your MySQL version from pre-4.1 to 4.1 or higher, the table/column character sets are not automatically converted to 'utf8', so they stay 'latin1' while the data stored in those tables is still 'utf8'.
Changing this is tricky, because if you simply change the character set, MySQL assumes that the stored data must also be converted from 'latin1' to 'utf8', which leads to garbage, because the data is already 'utf8'.
This plugin avoids the unwanted character set conversion of the stored data by taking the following steps for each Textpattern table:
# drop indexes
# convert all columns that contain text to use a 'binary' charset (instead of 'latin1')
# convert those columns yet again, this time to 'utf8' charset (instead of 'binary')
# add indexes
# optimize table
Afterwards an attempt is made to update the 'dbcharset' setting in the @textpattern/config.php@ file to 'utf8'. Should this fail due to lack of permissions, you will be asked to make this change yourself. Do not skip this step, otherwise you will experience irreparable data corruption so severe that you will cry like a baby (you made backups, right?), much to the amusement of onlookers.
How to use this plugin:
# *MAKE BACKUPS FIRST!* (not just the TXP tables, but the entire database)
# Make sure you have MySQL version 4.1 or higher installed.
# Activate the plugin.
# "Click here":?event=rvm_latin1_to_utf8 and follow the instructions on the screen.
# --- END PLUGIN HELP ---
';
if (version_compare(mysqli_get_server_info($DB->link), '4.1.0', '<'))
{
echo graf('The rvm_latin1_to_utf8 plugin has been removed, because your MySQL version is lower than 4.1.');
safe_delete('txp_plugin', "name = 'rvm_latin1_to_utf8'");
rvm_latin1_to_utf8_end();
}
# standard TXP tables
$txptables = array(
'textpattern',
'txp_category',
'txp_css',
'txp_discuss',
'txp_discuss_ipban',
'txp_discuss_nonce',
'txp_file',
'txp_form',
'txp_image',
'txp_lang',
'txp_link',
'txp_log',
'txp_page',
'txp_plugin',
'txp_prefs',
'txp_priv',
'txp_section',
'txp_token',
'txp_users'
);
# find tables that match the TXP table prefix, mark standard and selected ones.
$mytables = ps('mytables', array()) ? ps('mytables') : array();
$unknown = FALSE;
if ($rs = safe_query("SHOW TABLES LIKE '".addcslashes(doSlash(PFX), '%_')."%'"))
{
while ($row = mysqli_fetch_row($rs))
{
$table = substr($row[0], strlen(PFX));
if (in_array($table, $txptables) or in_array($table, $mytables))
{
$tables[] = $table;
}
else
{
$unknown = TRUE;
}
$inputs[] =
'';
}
}
else
{
echo graf('Hmmm... strange, I cannot find any tables that match your TXP table prefix');
}
# ask user what to do with tables we're not sure about.
if ($unknown and !ps('continue'))
{
echo
graf('Below, a list of tables is shown that may be part of your Textpattern install.').
graf('Some of these tables may have been created by plugins you have installed.
In that case, check the tables that these plugins have added.
Be careful not to check tables that are not related to this Textpattern install!').
graf('The standard Textpattern tables are already checked.').
form(
join(' ', $inputs).
graf(
eInput('rvm_latin1_to_utf8').' '.
fInput('submit', 'continue', 'Continue', 'publish')
)
);
rvm_latin1_to_utf8_end();
}
echo graf('Updating Textpattern tables...');
# loop through tables
foreach ($tables as $table)
{
$indexes = array();
$drop = array();
$create = array();
$bin = array();
$utf8 = array();
$cols = array();
# prepare alter statements for columns
$columns = getRows('SHOW COLUMNS FROM '.safe_pfx($table));
if ($columns) foreach ($columns as $column)
{
extract($column);
if (preg_match('/^(char|varchar|tinytext|text|mediumtext|longtext|enum|set)\b/', $Type))
{
$Null = ($Null == 'YES' ? ' NULL' : ' NOT NULL');
$begin = 'MODIFY `'.doSlash($Field).'` '.$Type.' CHARACTER SET ';
$end = $Null." DEFAULT '".doSlash($Default)."'";
$bin[] = $begin.'binary'.$end;
$utf8[] = $begin.'utf8 COLLATE utf8_general_ci'.$end;
$cols[] = $Field;
}
}
# analyse index structures
$rows = getRows('SHOW INDEX FROM '.safe_pfx($table));
if ($rows) foreach ($rows as $row)
{
extract($row);
if ($Key_name == 'PRIMARY' or !in_array($Column_name, $cols))
{
continue;
}
$Key_name = '`'.$Key_name.'`';
$indexes[$Key_name]['unique'] = ($Non_unique ? '' : ' UNIQUE');
$indexes[$Key_name]['fulltext'] = ($Index_type == 'FULLTEXT' ? ' FULLTEXT' : '');
$indexes[$Key_name]['columns'][$Seq_in_index] = '`'.$Column_name.'`'.($Sub_part ? ' ('.$Sub_part.')' : '');
}
# prepare index drop/create statements
foreach ($indexes as $index => $structure)
{
extract($structure);
$drop[] = 'DROP INDEX '.$index;
$create[] = 'ADD'.$unique.$fulltext.' INDEX '.$index.' ('.join(', ', $columns).')';
}
$success = TRUE;
# alter the table + indexes
if ($drop)
{
$success *= safe_alter($table, join(', ', $drop), $debug);
}
if ($bin and $success)
{
$success *= safe_alter($table, join(', ', $bin), $debug);
if ($success)
{
$success *= safe_alter($table, join(', ', $utf8).', DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci', $debug);
}
}
if ($create and $success)
{
$success *= safe_alter($table, join(', ', $create), $debug);
}
}
# while we're here, might as well optimize the tables
safe_query("OPTIMIZE TABLE ".join(', ', array_map('safe_pfx', $tables)), $debug);
if ($success)
{
echo graf('Successfully completed.');
$file = txpath.'/config.php';
$config = file_get_contents($file);
$config = str_replace('?>', n.'# rvm_latin1_to_utf8 plugin added the following line to ensure a correct dbcharset'.n.'$txpcfg[\'dbcharset\'] = \'utf8\''.n.'?>', $config);
if ($txpcfg['dbcharset'] == 'utf8'
or is_writable($file)
and $handle = fopen($file, 'w')
and fwrite($handle, $config) !== FALSE
and fclose($handle))
{
echo graf('The rvm_latin1_to_utf8 plugin has been automatically de-installed.');
safe_delete('txp_plugin', "name = 'rvm_latin1_to_utf8'");
}
else
{
echo graf('WARNING: textpattern/config.php could not be updated automatically. Please update your textpattern/config.php file to contain $txpcfg[\'dbcharset\'] = \'utf8\'; and deinstall the rvm_latin1_to_utf8 plugin manually');
}
}
else
{
echo graf('Due to one or more errors the tables could not be updated correctly. You may have to restore a backup.');
}
rvm_latin1_to_utf8_end();
}
function rvm_latin1_to_utf8_end()
{
echo '';
end_page();
exit();
}
# --- END PLUGIN CODE ---
?>