You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ryzom-core/tool/translation_repair/translation_repair.py

256 lines
10 KiB
Python

#
# Copyright (C) 2019-2020 Jan BOON <jan.boon@kaetemi.be>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# This script attempts to repair bad hashes in translation files
import os
path = "R:\\leveldesign\\translation"
def processUxt(wk, tl):
print(tl)
inComment = 0
inString = 0
lastIndex = ""
lastHash = ""
wkMap = {}
with open(wk, "r") as f:
for l in f:
if l.startswith("\ufeff"):
l = l[1:]
if not inString and not inComment:
s = l.split()
if len(s) > 0:
if not s[0].startswith("[") and not s[0].startswith("//") and not s[0].startswith("/*"):
wkMap[s[0]] = { "index": lastIndex, "hash": lastHash }
# print(s[0] + ", " + lastIndex + ", " + lastHash)
if l.startswith("// HASH_VALUE") or l.startswith("// INDEX"):
inString = 0
inComment = 0
if l.startswith("// HASH_VALUE"):
lastHash = l.rstrip()
if l.startswith("// INDEX"):
lastIndex = l.rstrip()
continue
inString = inString + l.count("[") - l.count("\\[") - l.count("]") + l.count("\\]")
inComment = inComment + l.count("/*") - l.count("*/")
inComment = 0
inString = 0
reIndex = {}
atIndex = 0
with open(tl, "r") as fr:
with open(tl + ".new", "w") as fw:
fw.write("\ufeff")
for l in fr:
if l.startswith("\ufeff"):
l = l[1:]
if not inString and not inComment:
s = l.split()
if len(s) > 0:
if not s[0].startswith("[") and not s[0].startswith("//") and not s[0].startswith("/*") and s[0] in wkMap:
if not wkMap[s[0]]["index"] in reIndex:
reIndex[wkMap[s[0]]["index"]] = "// INDEX " + str(atIndex)
atIndex = atIndex + 1
fw.write(reIndex[wkMap[s[0]]["index"]] + "\n")
fw.write(wkMap[s[0]]["hash"] + "\n")
if l.startswith("// HASH_VALUE") or l.startswith("// INDEX"):
inString = 0
inComment = 0
continue
fw.write(l)
inString = inString + l.count("[") - l.count("\\[") - l.count("]") + l.count("\\]")
inComment = inComment + l.count("/*") - l.count("*/")
os.replace(tl + ".new", tl)
#processUxt(path + "\\translated\\wk.uxt", path + "\\translated\\en.uxt")
#processUxt(path + "\\translated\\wk.uxt", path + "\\translated\\es.uxt")
#processUxt(path + "\\translated\\wk.uxt", path + "\\translated\\fr.uxt")
#processUxt(path + "\\translated\\wk.uxt", path + "\\translated\\ru.uxt")
#processUxt(path + "\\translated\\wk.uxt", path + "\\translated\\de.uxt")
#processUxt(path + "\\translated\\r2_wk.uxt", path + "\\translated\\r2_en.uxt")
#processUxt(path + "\\translated\\r2_wk.uxt", path + "\\translated\\r2_es.uxt")
#processUxt(path + "\\translated\\r2_wk.uxt", path + "\\translated\\r2_fr.uxt")
#processUxt(path + "\\translated\\r2_wk.uxt", path + "\\translated\\r2_ru.uxt")
#processUxt(path + "\\translated\\r2_wk.uxt", path + "\\translated\\r2_de.uxt")
#processUxt(path + "\\translated\\clause_wk.txt", path + "\\translated\\clause_en.txt")
#processUxt(path + "\\translated\\clause_wk.txt", path + "\\translated\\clause_es.txt")
#processUxt(path + "\\translated\\clause_wk.txt", path + "\\translated\\clause_fr.txt")
#processUxt(path + "\\translated\\clause_wk.txt", path + "\\translated\\clause_ru.txt")
#processUxt(path + "\\translated\\clause_wk.txt", path + "\\translated\\clause_de.txt")
def processPhrase(wk, tl):
print(tl)
inComment = 0
inString = 0
inPhrase = 0
lastHash = ""
wkMap = {}
with open(wk, "r") as f:
for l in f:
if l.startswith("\ufeff"):
l = l[1:]
if not inString and not inComment:
s = l.split()
if len(s) > 0:
if not s[0].startswith("[") and not s[0].startswith("//") and not s[0].startswith("/*") and not s[0].startswith("{") and len(lastHash) > 0:
wkMap[s[0]] = { "hash": lastHash }
lastHash = ""
# print(s[0] + ", " + lastIndex + ", " + lastHash)
if l.startswith("// HASH_VALUE"):
inString = 0
inComment = 0
inPhrase = 0
lastHash = l.rstrip()
continue
inString = inString + l.count("[") - l.count("\\[") - l.count("]") + l.count("\\]")
inPhrase = inString + l.count("{") - l.count("\\{") - l.count("}") + l.count("\\}")
inComment = inComment + l.count("/*") - l.count("*/")
inComment = 0
inString = 0
inPhrase = 0
with open(tl, "r") as fr:
with open(tl + ".new", "w") as fw:
fw.write("\ufeff")
for l in fr:
if l.startswith("\ufeff"):
l = l[1:]
if not inString and not inComment:
s = l.split()
if len(s) > 0:
if not s[0].startswith("[") and not s[0].startswith("//") and not s[0].startswith("/*") and not s[0].startswith("{") and s[0] in wkMap:
fw.write(wkMap[s[0]]["hash"] + "\n")
if l.startswith("// HASH_VALUE"):
inString = 0
inComment = 0
inPhrase = 0
continue
fw.write(l)
inString = inString + l.count("[") - l.count("\\[") - l.count("]") + l.count("\\]")
inPhrase = inString + l.count("{") - l.count("\\{") - l.count("}") + l.count("\\}")
inComment = inComment + l.count("/*") - l.count("*/")
os.replace(tl + ".new", tl)
##processPhrase(path + "\\translated\\phrase_en.txt", path + "\\translated\\phrase_en.txt")
#processPhrase(path + "\\translated\\phrase_en.txt", path + "\\translated\\phrase_es.txt")
#processPhrase(path + "\\translated\\phrase_en.txt", path + "\\translated\\phrase_fr.txt")
#processPhrase(path + "\\translated\\phrase_en.txt", path + "\\translated\\phrase_ru.txt")
#processPhrase(path + "\\translated\\phrase_en.txt", path + "\\translated\\phrase_de.txt")
#processPhrase(path + "\\translated\\phrase_en.txt", path + "\\translated\\phrase_pt.txt")
def processWords(ck, wk, tl):
if not os.path.isfile(ck):
return
print(tl)
wkMap = { }
with open(wk, "r") as f:
for l in f:
if l.startswith("\ufeff"):
l = l[1:]
s = l.split("\t")
if len(s) > 1 and len(s[0]) >= 16:
wkMap[s[1]] = s[0]
with open(tl, "r") as fr:
with open(tl + ".new", "w") as fw:
fw.write("\ufeff")
for l in fr:
if l.startswith("\ufeff"):
l = l[1:]
s = l.split("\t")
if len(s) > 1:
if s[1] in wkMap:
h = wkMap[s[1]]
l = h + l[len(s[0]):]
elif s[0] != "*HASH_VALUE":
l = "0000000000000000" + l[len(s[0]):]
fw.write(l)
os.replace(tl + ".new", tl)
def processWordsDiff(wk, tl):
if not os.path.isfile(wk):
return
print(tl)
wkMap = { }
with open(wk, "r") as f:
for l in f:
if l.startswith("\ufeff"):
l = l[1:]
s = l.split("\t")
if len(s) > 2 and len(s[1]) >= 16:
wkMap[s[2]] = s[1]
with open(tl, "r") as fr:
with open(tl + ".new", "w") as fw:
fw.write("\ufeff")
for l in fr:
if l.startswith("\ufeff"):
l = l[1:]
s = l.split("\t")
if len(s) > 1:
if s[1] in wkMap:
h = wkMap[s[1]]
l = h + l[len(s[0]):]
elif s[0] != "*HASH_VALUE" and len(s[0]) < 16:
l = "_0000000000000000" + l[len(s[0]):]
fw.write(l)
os.replace(tl + ".new", tl)
def processWordsAll(name):
#processWords(path + "\\translated\\" + name + "_wk.txt", path + "\\translated\\" + name + "_en.txt")
processWordsDiff(path + "\\diff\\" + name + "_en_diff_60C8831C.txt", path + "\\translated\\" + name + "_en.txt")
processWords(path + "\\diff\\" + name + "_es_diff_60C8831C.txt", path + "\\translated\\" + name + "_en.txt", path + "\\translated\\" + name + "_es.txt")
processWords(path + "\\diff\\" + name + "_fr_diff_60C8831C.txt", path + "\\translated\\" + name + "_en.txt", path + "\\translated\\" + name + "_fr.txt")
processWords(path + "\\diff\\" + name + "_ru_diff_60C8831C.txt", path + "\\translated\\" + name + "_en.txt", path + "\\translated\\" + name + "_ru.txt")
processWords(path + "\\diff\\" + name + "_de_diff_60C8831C.txt", path + "\\translated\\" + name + "_en.txt", path + "\\translated\\" + name + "_de.txt")
#processWordsDiff(path + "\\diff\\" + name + "_en_diff_60C8808A.txt", path + "\\translated\\" + name + "_en.txt")
#processWordsDiff(path + "\\diff\\" + name + "_es_diff_60C8808A.txt", path + "\\translated\\" + name + "_es.txt")
#processWordsDiff(path + "\\diff\\" + name + "_fr_diff_60C8808A.txt", path + "\\translated\\" + name + "_fr.txt")
#processWordsDiff(path + "\\diff\\" + name + "_ru_diff_60C8808A.txt", path + "\\translated\\" + name + "_ru.txt")
#processWordsDiff(path + "\\diff\\" + name + "_de_diff_60C8808A.txt", path + "\\translated\\" + name + "_de.txt")
processWordsAll("bodypart_words")
processWordsAll("career_words")
processWordsAll("characteristic_words")
processWordsAll("classificationtype_words")
processWordsAll("creature_words")
processWordsAll("damagetype_words")
processWordsAll("ecosystem_words")
processWordsAll("faction_words")
processWordsAll("item_words")
processWordsAll("job_words")
processWordsAll("outpost_words")
processWordsAll("place_words")
processWordsAll("powertype_words")
processWordsAll("race_words")
processWordsAll("sbrick_words")
processWordsAll("score_words")
processWordsAll("skill_words")
processWordsAll("sphrase_words")
processWordsAll("title_words")
#processWordsAll("damage_words")