library(spacyr)
txt <- c(doc1 = "Natural Language Processing is a branch of computer science that employs various Artificial Intelligence (AI) techniques to process content written in natural language. NLP-enhanced wikis can support users in finding, developing and organizing knowledge contained inside the wiki repository. ", 
  doc2 = "Paul earned a postgraduate degree from MIT.")
(tmp <- spacy_parse(txt, nounphrase = TRUE))
## Found 'spacy_condaenv'. spacyr will use this environment
## successfully initialized (spaCy Version: 2.0.16, language model: en)
## (python options: type = "condaenv", value = "spacy_condaenv")
##    doc_id sentence_id token_id        token        lemma   pos entity
## 1    doc1           1        1      Natural      natural PROPN  ORG_B
## 2    doc1           1        2     Language     language PROPN  ORG_I
## 3    doc1           1        3   Processing   processing PROPN  ORG_I
## 4    doc1           1        4           is           be  VERB       
## 5    doc1           1        5            a            a   DET       
## 6    doc1           1        6       branch       branch  NOUN       
## 7    doc1           1        7           of           of   ADP       
## 8    doc1           1        8     computer     computer  NOUN       
## 9    doc1           1        9      science      science  NOUN       
## 10   doc1           1       10         that         that   ADJ       
## 11   doc1           1       11      employs       employ  VERB       
## 12   doc1           1       12      various      various   ADJ       
## 13   doc1           1       13   Artificial   artificial PROPN  ORG_B
## 14   doc1           1       14 Intelligence intelligence PROPN  ORG_I
## 15   doc1           1       15            (            ( PUNCT  ORG_I
## 16   doc1           1       16           AI           ai PROPN  ORG_I
## 17   doc1           1       17            )            ) PUNCT       
## 18   doc1           1       18   techniques    technique  NOUN       
## 19   doc1           1       19           to           to  PART       
## 20   doc1           1       20      process      process  VERB       
## 21   doc1           1       21      content      content  NOUN       
## 22   doc1           1       22      written        write  VERB       
## 23   doc1           1       23           in           in   ADP       
## 24   doc1           1       24      natural      natural   ADJ       
## 25   doc1           1       25     language     language  NOUN       
## 26   doc1           1       26            .            . PUNCT       
## 27   doc1           2        1          NLP          nlp PROPN  ORG_B
## 28   doc1           2        2            -            - PUNCT       
## 29   doc1           2        3     enhanced      enhance  VERB       
## 30   doc1           2        4        wikis         wiki  NOUN       
## 31   doc1           2        5          can          can  VERB       
## 32   doc1           2        6      support      support  VERB       
## 33   doc1           2        7        users         user  NOUN       
## 34   doc1           2        8           in           in   ADP       
## 35   doc1           2        9      finding         find  VERB       
## 36   doc1           2       10            ,            , PUNCT       
## 37   doc1           2       11   developing      develop  VERB       
## 38   doc1           2       12          and          and CCONJ       
## 39   doc1           2       13   organizing     organize  VERB       
## 40   doc1           2       14    knowledge    knowledge  NOUN       
## 41   doc1           2       15    contained      contain  VERB       
## 42   doc1           2       16       inside       inside   ADP       
## 43   doc1           2       17          the          the   DET       
## 44   doc1           2       18         wiki         wiki  NOUN       
## 45   doc1           2       19   repository   repository  NOUN       
## 46   doc1           2       20            .            . PUNCT       
## 47   doc2           1        1         Paul         paul PROPN  ORG_B
## 48   doc2           1        2       earned         earn  VERB       
## 49   doc2           1        3            a            a   DET       
## 50   doc2           1        4 postgraduate postgraduate  NOUN       
## 51   doc2           1        5       degree       degree  NOUN       
## 52   doc2           1        6         from         from   ADP       
## 53   doc2           1        7          MIT          mit PROPN  ORG_B
## 54   doc2           1        8            .            . PUNCT       
##    nounphrase whitespace
## 1         beg       TRUE
## 2         mid       TRUE
## 3    end_root       TRUE
## 4                   TRUE
## 5         beg       TRUE
## 6    end_root       TRUE
## 7                   TRUE
## 8         beg       TRUE
## 9    end_root       TRUE
## 10                  TRUE
## 11                  TRUE
## 12        beg       TRUE
## 13        mid       TRUE
## 14        mid       TRUE
## 15        mid      FALSE
## 16        mid      FALSE
## 17        mid       TRUE
## 18   end_root       TRUE
## 19                  TRUE
## 20                  TRUE
## 21   beg_root       TRUE
## 22                  TRUE
## 23                  TRUE
## 24        beg       TRUE
## 25   end_root      FALSE
## 26                  TRUE
## 27        beg      FALSE
## 28        mid      FALSE
## 29        mid       TRUE
## 30   end_root       TRUE
## 31                  TRUE
## 32                  TRUE
## 33   beg_root       TRUE
## 34                  TRUE
## 35                 FALSE
## 36                  TRUE
## 37                  TRUE
## 38                  TRUE
## 39                  TRUE
## 40   beg_root       TRUE
## 41                  TRUE
## 42                  TRUE
## 43        beg       TRUE
## 44        mid       TRUE
## 45   end_root      FALSE
## 46                  TRUE
## 47   beg_root       TRUE
## 48                  TRUE
## 49        beg       TRUE
## 50        mid       TRUE
## 51   end_root       TRUE
## 52                  TRUE
## 53   beg_root      FALSE
## 54                 FALSE
nounphrase_extract(tmp)
##    doc_id sentence_id                                      nounphrase
## 1    doc1           1                     Natural Language Processing
## 2    doc1           1                                        a branch
## 3    doc1           1                                computer science
## 4    doc1           1 various Artificial Intelligence (AI) techniques
## 5    doc1           1                                         content
## 6    doc1           1                                natural language
## 7    doc1           2                              NLP-enhanced wikis
## 8    doc1           2                                           users
## 9    doc1           2                                       knowledge
## 10   doc1           2                             the wiki repository
## 11   doc2           1                                            Paul
## 12   doc2           1                           a postgraduate degree
## 13   doc2           1                                             MIT
##    root_token
## 1  Processing
## 2      branch
## 3     science
## 4  techniques
## 5     content
## 6    language
## 7       wikis
## 8       users
## 9   knowledge
## 10 repository
## 11       Paul
## 12     degree
## 13        MIT
nounphrase_consolidate(tmp)
##    doc_id sentence_id token_id
## 1    doc1           1        1
## 2    doc1           1        2
## 3    doc1           1        3
## 4    doc1           1        4
## 5    doc1           1        5
## 6    doc1           1        6
## 7    doc1           1        7
## 8    doc1           1        8
## 9    doc1           1        9
## 10   doc1           1       10
## 11   doc1           1       11
## 12   doc1           1       12
## 13   doc1           1       13
## 14   doc1           1       14
## 15   doc1           1       15
## 16   doc1           2        1
## 17   doc1           2        2
## 18   doc1           2        3
## 19   doc1           2        4
## 20   doc1           2        5
## 21   doc1           2        6
## 22   doc1           2        7
## 23   doc1           2        8
## 24   doc1           2        9
## 25   doc1           2       10
## 26   doc1           2       11
## 27   doc1           2       12
## 28   doc1           2       13
## 29   doc1           2       14
## 30   doc1           2       15
## 31   doc2           1        1
## 32   doc2           1        2
## 33   doc2           1        3
## 34   doc2           1        4
## 35   doc2           1        5
## 36   doc2           1        6
##                                              token
## 1                      Natural_Language_Processing
## 2                                               is
## 3                                         a_branch
## 4                                               of
## 5                                 computer_science
## 6                                             that
## 7                                          employs
## 8  various_Artificial_Intelligence_(AI)_techniques
## 9                                               to
## 10                                         process
## 11                                         content
## 12                                         written
## 13                                              in
## 14                                natural_language
## 15                                               .
## 16                              NLP-enhanced_wikis
## 17                                             can
## 18                                         support
## 19                                           users
## 20                                              in
## 21                                         finding
## 22                                               ,
## 23                                      developing
## 24                                             and
## 25                                      organizing
## 26                                       knowledge
## 27                                       contained
## 28                                          inside
## 29                             the_wiki_repository
## 30                                               .
## 31                                            Paul
## 32                                          earned
## 33                           a_postgraduate_degree
## 34                                            from
## 35                                             MIT
## 36                                               .
##                                             lemma        pos entity
## 1                     natural_language_processing nounphrase  ORG_B
## 2                                              be       VERB       
## 3                                        a_branch nounphrase       
## 4                                              of        ADP       
## 5                                computer_science nounphrase       
## 6                                            that        ADJ       
## 7                                          employ       VERB       
## 8  various_artificial_intelligence_(ai)_technique nounphrase       
## 9                                              to       PART       
## 10                                        process       VERB       
## 11                                        content nounphrase       
## 12                                          write       VERB       
## 13                                             in        ADP       
## 14                               natural_language nounphrase       
## 15                                              .      PUNCT       
## 16                               nlp-enhance_wiki nounphrase  ORG_B
## 17                                            can       VERB       
## 18                                        support       VERB       
## 19                                           user nounphrase       
## 20                                             in        ADP       
## 21                                           find       VERB       
## 22                                              ,      PUNCT       
## 23                                        develop       VERB       
## 24                                            and      CCONJ       
## 25                                       organize       VERB       
## 26                                      knowledge nounphrase       
## 27                                        contain       VERB       
## 28                                         inside        ADP       
## 29                            the_wiki_repository nounphrase       
## 30                                              .      PUNCT       
## 31                                           paul nounphrase  ORG_B
## 32                                           earn       VERB       
## 33                          a_postgraduate_degree nounphrase       
## 34                                           from        ADP       
## 35                                            mit nounphrase  ORG_B
## 36                                              .      PUNCT