update: 2026-03-28 20:59
This commit is contained in:
@@ -0,0 +1,105 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
计算机英语词汇数据转换脚本
|
||||
将txt格式的词汇文件转换为JavaScript数组格式
|
||||
"""
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
def parse_vocabulary_file(file_path):
|
||||
"""解析词汇文件"""
|
||||
vocabulary_list = []
|
||||
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
lines = file.readlines()
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line or line.startswith('51 - 100') or line.startswith('101 - 150') or line.startswith('151 - 200'):
|
||||
continue
|
||||
|
||||
# 使用正则表达式匹配词汇条目
|
||||
# 格式: 数字. 单词 [音标] 词性 中文释义
|
||||
pattern = r'^(\d+)\. ([a-zA-Z\-\']+) \[([^\]]*(?:\][^\]]*)*) ([a-zA-Z\.&]+) (.+)$'
|
||||
match = re.match(pattern, line)
|
||||
|
||||
if match:
|
||||
word_id, word, phonetic, word_type, meaning = match.groups()
|
||||
|
||||
# 修复音标格式问题
|
||||
if not phonetic.endswith(']'):
|
||||
# 查找下一个有效的词性和释义
|
||||
parts = line.split()
|
||||
for i, part in enumerate(parts):
|
||||
if part in ['n.', 'v.', 'vt.', 'vi.', 'a.', 'ad.', 'prep.', 'conj.', 'pron.']:
|
||||
word_type = part
|
||||
meaning = ' '.join(parts[i+1:])
|
||||
# 从原始行中提取音标
|
||||
phonetic_start = line.find('[') + 1
|
||||
phonetic_end = line.find(word_type) - 1
|
||||
phonetic = line[phonetic_start:phonetic_end].strip()
|
||||
break
|
||||
|
||||
vocabulary_item = {
|
||||
'id': int(word_id),
|
||||
'word': word.strip(),
|
||||
'phonetic': f'[{phonetic}]',
|
||||
'type': word_type.strip(),
|
||||
'meaning': meaning.strip()
|
||||
}
|
||||
|
||||
vocabulary_list.append(vocabulary_item)
|
||||
|
||||
return vocabulary_list
|
||||
|
||||
def generate_js_file(vocabulary_list, output_path):
|
||||
"""生成JavaScript文件"""
|
||||
js_content = '''// 计算机英语词汇数据
|
||||
const vocabularyData = [
|
||||
'''
|
||||
|
||||
for i, item in enumerate(vocabulary_list):
|
||||
js_content += f' {{ id: {item["id"]}, word: "{item["word"]}", phonetic: "{item["phonetic"]}", type: "{item["type"]}", meaning: "{item["meaning"]}" }}'
|
||||
|
||||
if i < len(vocabulary_list) - 1:
|
||||
js_content += ',\n'
|
||||
else:
|
||||
js_content += '\n'
|
||||
|
||||
js_content += '''];
|
||||
|
||||
// 导出数据供其他文件使用
|
||||
if (typeof module !== 'undefined' && module.exports) {
|
||||
module.exports = vocabularyData;
|
||||
}
|
||||
'''
|
||||
|
||||
with open(output_path, 'w', encoding='utf-8') as file:
|
||||
file.write(js_content)
|
||||
|
||||
print(f'成功生成JavaScript文件: {output_path}')
|
||||
print(f'共转换 {len(vocabulary_list)} 个词汇')
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
input_file = '计算机英语词汇.txt'
|
||||
output_file = 'vocabulary-data.js'
|
||||
|
||||
try:
|
||||
print('开始解析词汇文件...')
|
||||
vocabulary_list = parse_vocabulary_file(input_file)
|
||||
|
||||
print('开始生成JavaScript文件...')
|
||||
generate_js_file(vocabulary_list, output_file)
|
||||
|
||||
print('转换完成!')
|
||||
|
||||
except FileNotFoundError:
|
||||
print(f'错误: 找不到文件 {input_file}')
|
||||
except Exception as e:
|
||||
print(f'转换过程中发生错误: {e}')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user