Add package identify to get language identification from Google

This commit is contained in:
Kristóf Tóth 2019-10-12 00:17:48 +02:00
parent 80eb2de872
commit 4aa5a8b0a9
2 changed files with 133 additions and 0 deletions

47
identify/id.go Normal file
View File

@ -0,0 +1,47 @@
package identify
import (
"os/exec"
"regexp"
)
type Indetification struct {
word string
Identifier func(*Indetification) (string, error)
}
func New(word string) Indetification {
return Indetification{
word: word,
Identifier: executeTransShell,
}
}
func executeTransShell(i *Indetification) (string, error) {
config := []string{
"-no-ansi",
"-id",
i.word,
}
outBytes, err := exec.Command("trans", config...).Output()
return string(outBytes), err
}
func (i Indetification) Identify() string {
output, err := i.Identifier(&i)
if err != nil {
panic(err)
}
return parseTransIdentifyOutput(output)
}
func parseTransIdentifyOutput(out string) string {
re := regexp.MustCompile(`(?m)^Code\s+(\w+)$`)
result := ""
matches := re.FindStringSubmatch(out)
if (len(matches) >= 2) {
result = matches[1]
}
return result
}

86
identify/id_test.go Normal file
View File

@ -0,0 +1,86 @@
package identify_test
import (
"testing"
"."
"os"
)
type testData struct {
word string
identifyOutput string
expectedCode string
}
func TestIdentify(t *testing.T) {
cases := []testData{
testData{
"macska",
`Magyar
Name Hungarian
Family Uralic
Writing system Latin
Code hu
ISO 639-3 hun
SIL http://www-01.sil.org/iso639-3/documentation.asp?id=hun
Glottolog http://glottolog.org/resource/languoid/id/hung1274
Wikipedia http://en.wikipedia.org/wiki/Hungarian_language
`,
"hu",
},
testData{
"cat",
`English
Name English
Family Indo-European
Writing system Latin
Code en
ISO 639-3 eng
SIL http://www-01.sil.org/iso639-3/documentation.asp?id=eng
Glottolog http://glottolog.org/resource/languoid/id/stan1293
Wikipedia http://en.wikipedia.org/wiki/English_language
`,
"en",
},
testData{
"szofisztikált",
`Magyar
Name Hungarian
Family Uralic
Writing system Latin
Code hu
ISO 639-3 hun
SIL http://www-01.sil.org/iso639-3/documentation.asp?id=hun
Glottolog http://glottolog.org/resource/languoid/id/hung1274
Wikipedia http://en.wikipedia.org/wiki/Hungarian_language
`,
"hu",
},
testData{
"distribute",
`English
Name English
Family Indo-European
Writing system Latin
Code en
ISO 639-3 eng
SIL http://www-01.sil.org/iso639-3/documentation.asp?id=eng
Glottolog http://glottolog.org/resource/languoid/id/stan1293
Wikipedia http://en.wikipedia.org/wiki/English_language
`,
"en",
},
}
for _, data := range cases {
id := identify.New(data.word)
if _, ok := os.LookupEnv("NOMOCK"); !ok {
id.Identifier = func(i *identify.Indetification) (string, error) {
return data.identifyOutput, nil
}
}
if id.Identify() != data.expectedCode {
t.Errorf("Word '%s' should identify to '%s'!", data.word, data.expectedCode)
}
}
}