最近发现文档编码格式不统一,给协同工作带来了很多麻烦。由于python3支持的编码格式较多,这里利用python3编写了一个简单的编码转换工具,这里简单的笔记下。
原理很简单,使用一个编码读取文件,然后使用另一个编码写入文件。
#!/usr/bin/python3 # -*- coding: UTF-8 -*- from sys import argv import sys def check_file_encode(file_name, test_encode): test_result = 0; target_file = open(file_name, "r", encoding=test_encode); try: unusedcontent = target_file.read(); except UnicodeDecodeError: test_result = 0; #print("%s is not %s"%(file_name, test_encode)); else: test_result = 1; #print("%s is %s"%(file_name, test_encode)); target_file.close(); return test_result; def convert_file_encode(file_name, old_encode, new_encode): old_file = open(file_name, "r", encoding=old_encode); try: content = old_file.read(); except UnicodeDecodeError: read_content = 0; else: read_content = 1; old_file.close(); if read_content == 0: print("%s in %s read fails"%(file_name, old_encode)); return; new_file = open(file_name, "w", encoding=new_encode); new_file.write(content); new_file.close(); return; if __name__ == "__main__": script,file_name = argv; is_utf8 = check_file_encode(file_name, "utf8"); if is_utf8 == 1: print("%s is UTF8. Return"%(file_name)); sys.exit(); is_gbk = check_file_encode(file_name, "gbk"); if is_gbk == 0: print("%s is NOT gkb. Cannot Convert"%(file_name)); sys.exit(); convert_file_encode(file_name, "gbk", "utf8"); is_utf8 = check_file_encode(file_name, "utf8"); if is_utf8 == 1: print("%s Convert Success"%(file_name)); else: print("%s Convert Fails"%(file_name));
python支持的编码如下:
【 https://docs.python.org/3/library/codecs.html#standard-encodings 】
发表评论