SUMMARY="Language data files for Tesseract OCR engine" DESCRIPTION="Tesseract OCR can be fully trained to recognize new languages and scripts. \ A set of files for, community made, trained languages are available as \ separate packages per language." HOMEPAGE="https://github.com/tesseract-ocr/" LICENSE="Apache v2" COPYRIGHT="1985-1995 HP labs 2012 Google Inc." REVISION="4" SOURCE_URI="https://github.com/tesseract-ocr/tessdata/archive/$portVersion.tar.gz" CHECKSUM_SHA256="5dcb37198336b6953843b461ee535df1401b41008d550fc9e43d0edabca7adb1" SOURCE_DIR="tessdata-$portVersion" DISABLE_SOURCE_PACKAGE=yes ARCHITECTURES="any" PROVIDES=" $portName = $portVersion " BUILD_REQUIRES=" " declare -A languages # Special data files languages[osd]="orientation and script detection" languages[equ]="math / equation detection" # languages data files languages[afr]="Afrikaans" languages[amh]="Amharic" languages[ara]="Arabic" languages[asm]="Assamese" languages[aze]="Azerbaijani" languages[aze_cyrl]="Azerbaijani - Cyrilic" languages[bel]="Belarusian" languages[ben]="Bengali" languages[bod]="Tibetan" languages[bos]="Bosnian" languages[bul]="Bulgarian" languages[cat]="Catalan; Valencian" languages[ceb]="Cebuano" languages[ces]="Czech" languages[chi_sim]="Chinese - Simplified" languages[chi_tra]="Chinese - Traditional" languages[chr]="Cherokee" languages[cym]="Welsh" languages[dan]="Danish" languages[dan_frak]="Danish - Fraktur script" languages[deu]="German" languages[deu_frak]="Germain - Fraktur script" languages[dzo]="Dzongkha" languages[ell]="Greek, Modern (1453-)" languages[eng]="English" languages[enm]="English, Middle (1100-1500)" languages[epo]="Esperanto" languages[est]="Estonian" languages[eus]="Basque" languages[fas]="Persian" languages[fin]="Finnish" languages[fra]="French" languages[frk]="Frankish" languages[frm]="French, Middle (ca. 1400-1600)" languages[gle]="Irish" languages[glg]="Galician" languages[grc]="Greek, Ancient (-1453)" languages[guj]="Gujarati" languages[hat]="Haitian; Haitian Creole" languages[heb]="Hebrew" languages[hin]="Hindi" languages[hrv]="Croatian" languages[hun]="Hungarian" languages[iku]="Inuktitut" languages[ind]="Indonesian" languages[isl]="Icelandic" languages[ita]="Italian" languages[ita_old]="Italian - Old" languages[jav]="Javanese" languages[jpn]="Japanese" languages[kan]="Kannada" languages[kat]="Georgian" languages[kat_old]="Georgian - Old" languages[kaz]="Kazakh" languages[khm]="Central Khmer" languages[kir]="Kirghiz; Kyrgyz" languages[kor]="Korean" languages[kur]="Kurdish" languages[lao]="Lao" languages[lat]="Latin" languages[lav]="Latvian" languages[lit]="Lithuanian" languages[mal]="Malayalam" languages[mar]="Marathi" languages[mkd]="Macedonian" languages[mlt]="Maltese" languages[msa]="Malay" languages[mya]="Burmese" languages[nep]="Nepali" languages[nld]="Dutch; Flemish" languages[nor]="Norvegian" languages[ori]="Oriya" languages[pan]="Panjabi; Punjabi" languages[pol]="Polish" languages[por]="Portuguese" languages[pus]="Pushto; Pastho" languages[ron]="Romanian; Moldavian; Moldovan" languages[rus]="Russian" languages[san]="Sanskrit" languages[sin]="Sinhala; Sinhalese" languages[slk]="Slovak" languages[slk_frak]="Slovak - Fraktur script" languages[slv]="Slovenian" languages[spa]="Spanish; Castilian" languages[spa_old]="Spanish; Castilian - Old" languages[sqi]="Albanian" languages[srp]="Serbian" languages[srp_latn]="Serbian - Latin" languages[swa]="Swahili" languages[swe]="Swedish" languages[syr]="Syriac" languages[tam]="Tamil" languages[tel]="Telugu" languages[tgk]="Tajik" languages[tgl]="Tagalog" languages[tha]="Thai" languages[tir]="Tigrinya" languages[tur]="Turkish" languages[uig]="Uighur; Uyghur" languages[ukr]="Ukrainian" languages[urd]="Urdu" languages[uzb]="Uzbek" languages[uzb_cyrl]="Uzbek - Cyrilic" languages[vie]="Vietnamese" languages[yid]="Yiddish" for lang in "${!languages[@]}"; do desc=${languages[${lang}]} eval "\ SUMMARY_${lang}=\"Data files for ${desc}\";\ PROVIDES_${lang}=\"\ ${portName}_${lang} = $portVersion\ \"; \ REQUIRES_${lang}=\"\ haiku\n\ vendor_tesseract >= 3\n\ \"" done INSTALL() { mkdir -p $dataDir/tessdata for lang in "${!languages[@]}"; do cp $lang.* $dataDir/tessdata packageEntries $lang \ $dataDir/tessdata/$lang.* done }