Add package identify to get language identification from Google
This commit is contained in:
		
							
								
								
									
										47
									
								
								identify/id.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								identify/id.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,47 @@
 | 
			
		||||
package identify
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"os/exec"
 | 
			
		||||
	"regexp"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
type Indetification struct {
 | 
			
		||||
	word string
 | 
			
		||||
	Identifier func(*Indetification) (string, error)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func New(word string) Indetification {
 | 
			
		||||
	return Indetification{
 | 
			
		||||
		word: word,
 | 
			
		||||
		Identifier: executeTransShell,
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func executeTransShell(i *Indetification) (string, error) {
 | 
			
		||||
	config := []string{
 | 
			
		||||
		"-no-ansi",
 | 
			
		||||
		"-id",
 | 
			
		||||
		i.word,
 | 
			
		||||
	}
 | 
			
		||||
	outBytes, err := exec.Command("trans", config...).Output()
 | 
			
		||||
	return string(outBytes), err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (i Indetification) Identify() string {
 | 
			
		||||
	output, err := i.Identifier(&i)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		panic(err)
 | 
			
		||||
	}
 | 
			
		||||
	return parseTransIdentifyOutput(output)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func parseTransIdentifyOutput(out string) string {
 | 
			
		||||
	re := regexp.MustCompile(`(?m)^Code\s+(\w+)$`)
 | 
			
		||||
	result := ""
 | 
			
		||||
	matches := re.FindStringSubmatch(out)
 | 
			
		||||
	if (len(matches) >= 2) {
 | 
			
		||||
		result = matches[1]
 | 
			
		||||
	}
 | 
			
		||||
	return result
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										86
									
								
								identify/id_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										86
									
								
								identify/id_test.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,86 @@
 | 
			
		||||
package identify_test
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"testing"
 | 
			
		||||
	"."
 | 
			
		||||
	"os"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
type testData struct {
 | 
			
		||||
	word string
 | 
			
		||||
	identifyOutput string
 | 
			
		||||
	expectedCode string
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestIdentify(t *testing.T) {
 | 
			
		||||
	cases := []testData{
 | 
			
		||||
		testData{
 | 
			
		||||
"macska",
 | 
			
		||||
`Magyar
 | 
			
		||||
Name                  Hungarian
 | 
			
		||||
Family                Uralic
 | 
			
		||||
Writing system        Latin
 | 
			
		||||
Code                  hu
 | 
			
		||||
ISO 639-3             hun
 | 
			
		||||
SIL                   http://www-01.sil.org/iso639-3/documentation.asp?id=hun
 | 
			
		||||
Glottolog             http://glottolog.org/resource/languoid/id/hung1274
 | 
			
		||||
Wikipedia             http://en.wikipedia.org/wiki/Hungarian_language
 | 
			
		||||
`,
 | 
			
		||||
"hu",
 | 
			
		||||
		},
 | 
			
		||||
		testData{
 | 
			
		||||
"cat",
 | 
			
		||||
`English
 | 
			
		||||
Name                  English
 | 
			
		||||
Family                Indo-European
 | 
			
		||||
Writing system        Latin
 | 
			
		||||
Code                  en
 | 
			
		||||
ISO 639-3             eng
 | 
			
		||||
SIL                   http://www-01.sil.org/iso639-3/documentation.asp?id=eng
 | 
			
		||||
Glottolog             http://glottolog.org/resource/languoid/id/stan1293
 | 
			
		||||
Wikipedia             http://en.wikipedia.org/wiki/English_language
 | 
			
		||||
`,
 | 
			
		||||
"en",
 | 
			
		||||
		},
 | 
			
		||||
		testData{
 | 
			
		||||
"szofisztikált",
 | 
			
		||||
`Magyar
 | 
			
		||||
Name                  Hungarian
 | 
			
		||||
Family                Uralic
 | 
			
		||||
Writing system        Latin
 | 
			
		||||
Code                  hu
 | 
			
		||||
ISO 639-3             hun
 | 
			
		||||
SIL                   http://www-01.sil.org/iso639-3/documentation.asp?id=hun
 | 
			
		||||
Glottolog             http://glottolog.org/resource/languoid/id/hung1274
 | 
			
		||||
Wikipedia             http://en.wikipedia.org/wiki/Hungarian_language
 | 
			
		||||
`,
 | 
			
		||||
"hu",
 | 
			
		||||
		},
 | 
			
		||||
		testData{
 | 
			
		||||
"distribute",
 | 
			
		||||
`English
 | 
			
		||||
Name                  English
 | 
			
		||||
Family                Indo-European
 | 
			
		||||
Writing system        Latin
 | 
			
		||||
Code                  en
 | 
			
		||||
ISO 639-3             eng
 | 
			
		||||
SIL                   http://www-01.sil.org/iso639-3/documentation.asp?id=eng
 | 
			
		||||
Glottolog             http://glottolog.org/resource/languoid/id/stan1293
 | 
			
		||||
Wikipedia             http://en.wikipedia.org/wiki/English_language
 | 
			
		||||
`,
 | 
			
		||||
"en",
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
	for _, data := range cases {
 | 
			
		||||
		id := identify.New(data.word)
 | 
			
		||||
		if _, ok := os.LookupEnv("NOMOCK"); !ok {
 | 
			
		||||
			id.Identifier = func(i *identify.Indetification) (string, error) {
 | 
			
		||||
				return data.identifyOutput, nil
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		if id.Identify() != data.expectedCode {
 | 
			
		||||
			t.Errorf("Word '%s' should identify to '%s'!", data.word, data.expectedCode)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user