Projects/Summer of Code/2007/Projects/KAider: Difference between revisions

From KDE TechBase
m (xml markup propertly highlighted)
Line 79: Line 79:
This is how a TBX glossary looks like:
This is how a TBX glossary looks like:


    <?xml version="1.0" encoding="UTF-8"?>
<code xml>
    <!DOCTYPE martif PUBLIC "ISO 12200:1999A//DTD MARTIF core (DXFcdV04)//EN" "TBXcdv04.dtd">
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE martif PUBLIC "ISO 12200:1999A//DTD MARTIF core (DXFcdV04)//EN" "TBXcdv04.dtd">
      
      
    <martif type="TBX" xml:lang="en">
<martif type="TBX" xml:lang="en">
        <martifHeader>
    <martifHeader>
            <fileDesc>
        <fileDesc>
                <titleStmt>
            <titleStmt>
                    <title>KDE Russian Team Glossary</title>
                <title>KDE Russian Team Glossary</title>
                </titleStmt>
            </titleStmt>
            </fileDesc>
        </fileDesc>
        </martifHeader>
    </martifHeader>
        <text>
    <text>
            <body>
        <body>
                <termEntry id="1">
            <termEntry id="1">
                    <descrip type="subjectField">Security</descrip>
                <descrip type="subjectField">Security</descrip>
                    <descrip type="relatedConcept">authorization</descrip>
                <descrip type="relatedConcept">authorization</descrip>
                    <langSet xml:lang="en">
                <langSet xml:lang="en">
                        <ntig>
                    <ntig>
                            <termGrp>
                        <termGrp>
                                <term>authentication</term>
                            <term>authentication</term>
                                <termNote type="partOfSpeech">noun</termNote>
                            <termNote type="partOfSpeech">noun</termNote>
                            </termGrp>
                        </termGrp>
                            <descrip type="context">.</descrip>
                        <descrip type="context">.</descrip>
                        </ntig>
                    </ntig>
                    </langSet>
                </langSet>
                    <langSet xml:lang="ru">
                <langSet xml:lang="ru">
                        <ntig>
                    <ntig>
                            <termGrp>
                        <termGrp>
                                <term>идентификация</term>
                            <term>идентификация</term>
                                <termNote type="partOfSpeech">сущ.</termNote>
                            <termNote type="partOfSpeech">сущ.</termNote>
                            </termGrp>
                        </termGrp>
                        </ntig>
                    </ntig>
                    </langSet>
                </langSet>
                    <langSet xml:lang="ru">
                <langSet xml:lang="ru">
                        <ntig>
                    <ntig>
                            <termGrp>
                        <termGrp>
                                <term>подтверждение подлинности</term>
                            <term>подтверждение подлинности</term>
                                <termNote type="partOfSpeech">сущ.</termNote>
                            <termNote type="partOfSpeech">сущ.</termNote>
                            </termGrp>
                        </termGrp>
                        </ntig>
                    </ntig>
                    </langSet>
                </langSet>
                </termEntry>
            </termEntry>
                <termEntry id="2">
            <termEntry id="2">
                    <descrip type="subjectField">Security</descrip>
                <descrip type="subjectField">Security</descrip>
                    <descrip type="relatedConcept">authentication</descrip>
                <descrip type="relatedConcept">authentication</descrip>
                    <langSet xml:lang="en">
                <langSet xml:lang="en">
                        <ntig>
                    <ntig>
                            <termGrp>
                        <termGrp>
                                <term>authorization</term>
                            <term>authorization</term>
                                <termNote type="partOfSpeech">noun</termNote>
                            <termNote type="partOfSpeech">noun</termNote>
                            </termGrp>
                        </termGrp>
                            <descrip type="context"></descrip>
                        <descrip type="context"></descrip>
                        </ntig>
                    </ntig>
                    </langSet>
                </langSet>
                    <langSet xml:lang="ru">
                <langSet xml:lang="ru">
                        <ntig>
                    <ntig>
                            <termGrp>
                        <termGrp>
                                <term>определение прав</term>
                            <term>определение прав</term>
                                <termNote type="partOfSpeech">сущ.</termNote>
                            <termNote type="partOfSpeech">сущ.</termNote>
                            </termGrp>
                        </termGrp>
                        </ntig>
                    </ntig>
                    </langSet>
                </langSet>
                </termEntry>
            </termEntry>
            </body>
        </body>
        </text>
    </text>
    </martif>
</martif>
</code>

Revision as of 21:07, 12 July 2007

KAider is a computer-aided translation system that focuses on productivity and performance (nojava!). Translator does only creative work (of delivering message in his/her mother language in laconic and easy to understand form). KAider implies parapgraph-by-paragrah translation approach (when translating documentation) and message-by-message approach (when translating GUI). See KAider/Introduction

Current state

Already has:

  • syntax highlighting
  • spellcheck (problems with dividing filter:doesnt check the last word)
  • search-n-replace, ignoring accel marks
  • small features like quick tag insert, placing text cursor right after the tag in the beginning (e.g. '<qt>|foobar</qt>' where "|" is a cursor)
  • entry bookmarks
  • viewer of the difference between current msgid and previous one (i.e. msgid translation of which current msgstr really is -- for fuzzies generated with --previous gettext option)
  • merge mode for editors (QA) or when several translators work on the same file screenshot
  • basic projectmanager functionality screenshot
  • glossary with basic tbx format support. KAider displays relevant entries on-the-fly and provides shortcuts to insert them. also, you can add new glossary terms via context menu of the glossary screenshot
  • webquery view, flexible thanks to kross

Compiling

After you set kde env up (compiling kdelibs is enough):

cd trunk
svn up playground/devtools/kaider
su kde-devel
mkdir playground/devtools/kaider/build
cd playground/devtools/kaider/build
cmakekde ..

as a root, run sshd and then from the usual shell:

ssh -XC kde-devel@localhost
kaider

Roadmap

  • [basic framework DONE] project management -- 1-2 weeks
  • [DONE] context glossary -- 0.5-1 week
  • translation DB (QtSql) -- 2 weeks
  • [DONE] mode for merging translations for editors (QA) -- 1 week
  • scripting API + sipping on google translate for live glossary (kross) - 2 weeks
  • the remaining time is for perfection/polishing/small improvements and xliff+qt-linguist support

What i'm doing these days

  • kross WebQuery framework - testing, final refinements
  • impovements on ProjectView (dbus, etc), Glossary

Ideas

Current:

  • Webquery scripts for close languages;
  • project-wise and program-wise: webquery scripts, glossaries, TMs

Further work:

  • Research on dividing into sentences rules (e.g. srx)
  • Automate submitting translation suggestions to translate.google.com [Kross action]

Not for KDE:

  • Be complete computer-aided translation system by providing e.g. actions to import+export openoffice, txt and documents of other formats by calling appropriate scripts/commands. Define for that general kross actions interface.
  • Make nice windoze package for the windowzerz

KBabel features to be implemented

...in the smarter way :). After or during the summer.

  • Character selection tool integration (kdelibs rule); sort by the frequency
  • persistent bookmarks for messages in a file saved in the project
  • extended marking of .po and .pot files (e.g. translator that currently works on the file and cince when) saved in the project
  • Search/Replace functions in multiple files at once.
  • Spellchecking of multiple files at once.
  • Opening source code by references in message comments [Kross action]
  • A plugin framework for validation tools for consistency checks [Kross action triggered on saving]
  • Sending the file using email [Kross (project) action]
  • Automatic syntax check with msgfmt when saving and, if an error occured, easy navigation to messages, which contain errors. == Syntax check (msgfmt --statistics) for existing files to control if the translated files will compile and, accordingly, work when distributed [Kross (project) action]
  • CVS and SVN support [Kross project action] (is 'svn ci' so hard?)
  • Automatic comparisons and statistics of POT and PO files for a quick overview which and how many files are translated (or not) and which files may be obsolete + [Kross (project) action] that merges translations with updated template
  • PO File Header change [Kross action (+triggered on saving)]
  • Printing of selected messages (eg fuzzy ) [Kross action]

KBabel features NOT to be implemented

  • Automatic ("rough" in kbabel terms) translation. Pure machine translation is a joke. All machine-made translations must be verified by human (and let it be a translator rather then user).

Setup

  • Create project, saving *.ktp file to l10n-kde4/<LangCode>
  • Create in the same dir file called terms.tbx, and fill it with initial contents:

Glossary

This is how a TBX glossary looks like:

<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE martif PUBLIC "ISO 12200:1999A//DTD MARTIF core (DXFcdV04)//EN" "TBXcdv04.dtd">

<martif type="TBX" xml:lang="en">

   <martifHeader>
       <fileDesc>
           <titleStmt>
               <title>KDE Russian Team Glossary</title>
           </titleStmt>
       </fileDesc>
   </martifHeader>
   <text>
       <body>
           <termEntry id="1">
               <descrip type="subjectField">Security</descrip>
               <descrip type="relatedConcept">authorization</descrip>
               <langSet xml:lang="en">
                   <ntig>
                       <termGrp>
                           <term>authentication</term>
                           <termNote type="partOfSpeech">noun</termNote>
                       </termGrp>
                       <descrip type="context">.</descrip>
                   </ntig>
               </langSet>
               <langSet xml:lang="ru">
                   <ntig>
                       <termGrp>
                           <term>идентификация</term>
                           <termNote type="partOfSpeech">сущ.</termNote>
                       </termGrp>
                   </ntig>
               </langSet>
               <langSet xml:lang="ru">
                   <ntig>
                       <termGrp>
                           <term>подтверждение подлинности</term>
                           <termNote type="partOfSpeech">сущ.</termNote>
                       </termGrp>
                   </ntig>
               </langSet>
           </termEntry>
           <termEntry id="2">
               <descrip type="subjectField">Security</descrip>
               <descrip type="relatedConcept">authentication</descrip>
               <langSet xml:lang="en">
                   <ntig>
                       <termGrp>
                           <term>authorization</term>
                           <termNote type="partOfSpeech">noun</termNote>
                       </termGrp>
                       <descrip type="context"></descrip>
                   </ntig>
               </langSet>
               <langSet xml:lang="ru">
                   <ntig>
                       <termGrp>
                           <term>определение прав</term>
                           <termNote type="partOfSpeech">сущ.</termNote>
                       </termGrp>
                   </ntig>
               </langSet>
           </termEntry>
       </body>
   </text>

</martif>