AipNlp.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380
  1. <?php
  2. /*
  3. * Copyright (c) 2017 Baidu.com, Inc. All Rights Reserved
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  6. * use this file except in compliance with the License. You may obtain a copy of
  7. * the License at
  8. *
  9. * Http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  13. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  14. * License for the specific language governing permissions and limitations under
  15. * the License.
  16. */
  17. require_once 'lib/AipBase.php';
  18. class AipNlp extends AipBase {
  19. /**
  20. * 词法分析 lexer api url
  21. * @var string
  22. */
  23. private $lexerUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/lexer';
  24. /**
  25. * 词法分析(定制版) lexer_custom api url
  26. * @var string
  27. */
  28. private $lexerCustomUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/lexer_custom';
  29. /**
  30. * 依存句法分析 dep_parser api url
  31. * @var string
  32. */
  33. private $depParserUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/depparser';
  34. /**
  35. * 词向量表示 word_embedding api url
  36. * @var string
  37. */
  38. private $wordEmbeddingUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/word_emb_vec';
  39. /**
  40. * DNN语言模型 dnnlm_cn api url
  41. * @var string
  42. */
  43. private $dnnlmCnUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/dnnlm_cn';
  44. /**
  45. * 词义相似度 word_sim_embedding api url
  46. * @var string
  47. */
  48. private $wordSimEmbeddingUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/word_emb_sim';
  49. /**
  50. * 短文本相似度 simnet api url
  51. * @var string
  52. */
  53. private $simnetUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/simnet';
  54. /**
  55. * 评论观点抽取 comment_tag api url
  56. * @var string
  57. */
  58. private $commentTagUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v2/comment_tag';
  59. /**
  60. * 情感倾向分析 sentiment_classify api url
  61. * @var string
  62. */
  63. private $sentimentClassifyUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/sentiment_classify';
  64. /**
  65. * 文章标签 keyword api url
  66. * @var string
  67. */
  68. private $keywordUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/keyword';
  69. /**
  70. * 文章分类 topic api url
  71. * @var string
  72. */
  73. private $topicUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/topic';
  74. /**
  75. * 文本纠错 ecnet api url
  76. * @var string
  77. */
  78. private $ecnetUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/ecnet';
  79. /**
  80. * 对话情绪识别接口 emotion api url
  81. * @var string
  82. */
  83. private $emotionUrl = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/emotion';
  84. /**
  85. * 格式化结果
  86. * @param $content string
  87. * @return mixed
  88. */
  89. protected function proccessResult($content){
  90. return json_decode(mb_convert_encoding($content, 'UTF8', 'GBK'), true, 512, JSON_BIGINT_AS_STRING);
  91. }
  92. /**
  93. * 词法分析接口
  94. *
  95. * @param string $text - 待分析文本(目前仅支持GBK编码),长度不超过65536字节
  96. * @param array $options - 可选参数对象,key: value都为string类型
  97. * @description options列表:
  98. * @return array
  99. */
  100. public function lexer($text, $options=array()){
  101. $data = array();
  102. $data['text'] = $text;
  103. $data = array_merge($data, $options);
  104. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  105. return $this->request($this->lexerUrl, $data);
  106. }
  107. /**
  108. * 词法分析(定制版)接口
  109. *
  110. * @param string $text - 待分析文本(目前仅支持GBK编码),长度不超过65536字节
  111. * @param array $options - 可选参数对象,key: value都为string类型
  112. * @description options列表:
  113. * @return array
  114. */
  115. public function lexerCustom($text, $options=array()){
  116. $data = array();
  117. $data['text'] = $text;
  118. $data = array_merge($data, $options);
  119. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  120. return $this->request($this->lexerCustomUrl, $data);
  121. }
  122. /**
  123. * 依存句法分析接口
  124. *
  125. * @param string $text - 待分析文本(目前仅支持GBK编码),长度不超过256字节
  126. * @param array $options - 可选参数对象,key: value都为string类型
  127. * @description options列表:
  128. * mode 模型选择。默认值为0,可选值mode=0(对应web模型);mode=1(对应query模型)
  129. * @return array
  130. */
  131. public function depParser($text, $options=array()){
  132. $data = array();
  133. $data['text'] = $text;
  134. $data = array_merge($data, $options);
  135. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  136. return $this->request($this->depParserUrl, $data);
  137. }
  138. /**
  139. * 词向量表示接口
  140. *
  141. * @param string $word - 文本内容(GBK编码),最大64字节
  142. * @param array $options - 可选参数对象,key: value都为string类型
  143. * @description options列表:
  144. * @return array
  145. */
  146. public function wordEmbedding($word, $options=array()){
  147. $data = array();
  148. $data['word'] = $word;
  149. $data = array_merge($data, $options);
  150. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  151. return $this->request($this->wordEmbeddingUrl, $data);
  152. }
  153. /**
  154. * DNN语言模型接口
  155. *
  156. * @param string $text - 文本内容(GBK编码),最大512字节,不需要切词
  157. * @param array $options - 可选参数对象,key: value都为string类型
  158. * @description options列表:
  159. * @return array
  160. */
  161. public function dnnlm($text, $options=array()){
  162. $data = array();
  163. $data['text'] = $text;
  164. $data = array_merge($data, $options);
  165. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  166. return $this->request($this->dnnlmCnUrl, $data);
  167. }
  168. /**
  169. * 词义相似度接口
  170. *
  171. * @param string $word1 - 词1(GBK编码),最大64字节
  172. * @param string $word2 - 词1(GBK编码),最大64字节
  173. * @param array $options - 可选参数对象,key: value都为string类型
  174. * @description options列表:
  175. * mode 预留字段,可选择不同的词义相似度模型。默认值为0,目前仅支持mode=0
  176. * @return array
  177. */
  178. public function wordSimEmbedding($word1, $word2, $options=array()){
  179. $data = array();
  180. $data['word_1'] = $word1;
  181. $data['word_2'] = $word2;
  182. $data = array_merge($data, $options);
  183. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  184. return $this->request($this->wordSimEmbeddingUrl, $data);
  185. }
  186. /**
  187. * 短文本相似度接口
  188. *
  189. * @param string $text1 - 待比较文本1(GBK编码),最大512字节
  190. * @param string $text2 - 待比较文本2(GBK编码),最大512字节
  191. * @param array $options - 可选参数对象,key: value都为string类型
  192. * @description options列表:
  193. * model 默认为"BOW",可选"BOW"、"CNN"与"GRNN"
  194. * @return array
  195. */
  196. public function simnet($text1, $text2, $options=array()){
  197. $data = array();
  198. $data['text_1'] = $text1;
  199. $data['text_2'] = $text2;
  200. $data = array_merge($data, $options);
  201. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  202. return $this->request($this->simnetUrl, $data);
  203. }
  204. /**
  205. * 评论观点抽取接口
  206. *
  207. * @param string $text - 评论内容(GBK编码),最大10240字节
  208. * @param array $options - 可选参数对象,key: value都为string类型
  209. * @description options列表:
  210. * type 评论行业类型,默认为4(餐饮美食)
  211. * @return array
  212. */
  213. public function commentTag($text, $options=array()){
  214. $data = array();
  215. $data['text'] = $text;
  216. $data = array_merge($data, $options);
  217. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  218. return $this->request($this->commentTagUrl, $data);
  219. }
  220. /**
  221. * 情感倾向分析接口
  222. *
  223. * @param string $text - 文本内容(GBK编码),最大102400字节
  224. * @param array $options - 可选参数对象,key: value都为string类型
  225. * @description options列表:
  226. * @return array
  227. */
  228. public function sentimentClassify($text, $options=array()){
  229. $data = array();
  230. $data['text'] = $text;
  231. $data = array_merge($data, $options);
  232. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  233. return $this->request($this->sentimentClassifyUrl, $data);
  234. }
  235. /**
  236. * 文章标签接口
  237. *
  238. * @param string $title - 篇章的标题,最大80字节
  239. * @param string $content - 篇章的正文,最大65535字节
  240. * @param array $options - 可选参数对象,key: value都为string类型
  241. * @description options列表:
  242. * @return array
  243. */
  244. public function keyword($title, $content, $options=array()){
  245. $data = array();
  246. $data['title'] = $title;
  247. $data['content'] = $content;
  248. $data = array_merge($data, $options);
  249. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  250. return $this->request($this->keywordUrl, $data);
  251. }
  252. /**
  253. * 文章分类接口
  254. *
  255. * @param string $title - 篇章的标题,最大80字节
  256. * @param string $content - 篇章的正文,最大65535字节
  257. * @param array $options - 可选参数对象,key: value都为string类型
  258. * @description options列表:
  259. * @return array
  260. */
  261. public function topic($title, $content, $options=array()){
  262. $data = array();
  263. $data['title'] = $title;
  264. $data['content'] = $content;
  265. $data = array_merge($data, $options);
  266. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  267. return $this->request($this->topicUrl, $data);
  268. }
  269. /**
  270. * 文本纠错接口
  271. *
  272. * @param string $text - 待纠错文本,输入限制511字节
  273. * @param array $options - 可选参数对象,key: value都为string类型
  274. * @description options列表:
  275. * @return array
  276. */
  277. public function ecnet($text, $options=array()){
  278. $data = array();
  279. $data['text'] = $text;
  280. $data = array_merge($data, $options);
  281. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  282. return $this->request($this->ecnetUrl, $data);
  283. }
  284. /**
  285. * 对话情绪识别接口接口
  286. *
  287. * @param string $text - 待识别情感文本,输入限制512字节
  288. * @param array $options - 可选参数对象,key: value都为string类型
  289. * @description options列表:
  290. * scene default(默认项-不区分场景),talk(闲聊对话-如度秘聊天等),task(任务型对话-如导航对话等),customer_service(客服对话-如电信/银行客服等)
  291. * @return array
  292. */
  293. public function emotion($text, $options=array()){
  294. $data = array();
  295. $data['text'] = $text;
  296. $data = array_merge($data, $options);
  297. $data = mb_convert_encoding(json_encode($data), 'GBK', 'UTF8');
  298. return $this->request($this->emotionUrl, $data);
  299. }
  300. }