好记性不如铅笔头

python && jython, 编程

利用python重新编码文件

最近发现文档编码格式不统一,给协同工作带来了很多麻烦。由于python3支持的编码格式较多,这里利用python3编写了一个简单的编码转换工具,这里简单的笔记下。
原理很简单,使用一个编码读取文件,然后使用另一个编码写入文件。

#!/usr/bin/python3
# -*- coding: UTF-8 -*- 

from sys import argv
import sys

def check_file_encode(file_name, test_encode):
	test_result = 0;
	target_file = open(file_name, "r", encoding=test_encode);
	try:
		unusedcontent = target_file.read();
	except UnicodeDecodeError:
		test_result = 0;
		#print("%s is not %s"%(file_name, test_encode));
	else:
		test_result = 1;
		#print("%s is %s"%(file_name, test_encode));
	
	
	target_file.close();	
	return test_result;

def convert_file_encode(file_name, old_encode, new_encode):
	old_file = open(file_name, "r", encoding=old_encode);
	try:
		content = old_file.read();
	except UnicodeDecodeError:
		read_content = 0;
	else:
		read_content = 1;
	old_file.close();
	if read_content == 0:
		print("%s in %s read fails"%(file_name, old_encode));
		return;
	
	new_file = open(file_name, "w", encoding=new_encode);
	new_file.write(content);
	new_file.close();
	return;

if __name__ == "__main__":
	script,file_name = argv;
	is_utf8 = check_file_encode(file_name, "utf8");
	if is_utf8 == 1:
		print("%s is UTF8. Return"%(file_name));
		sys.exit();
	
	is_gbk = check_file_encode(file_name, "gbk");
	if is_gbk == 0:
		print("%s is NOT gkb. Cannot Convert"%(file_name));
		sys.exit();
	
	convert_file_encode(file_name, "gbk", "utf8");
	is_utf8 = check_file_encode(file_name, "utf8");
	if is_utf8 == 1:
		print("%s Convert Success"%(file_name));
	else:
		print("%s Convert Fails"%(file_name));

 python支持的编码如下:
【 https://docs.python.org/3/library/codecs.html#standard-encodings

Leave a Reply

18 − 10 =

此站点使用Akismet来减少垃圾评论。了解我们如何处理您的评论数据