使用外部API提取

<?php


/**
 * Retrieve only the body from the raw response.
 *
 * @since 2.7.0
 *
 * @param array|WP_Error $response HTTP response.
 * @return string The body of the response. Empty string if no body or incorrect parameter given.
 */
function wp_remote_retrieve_body( $response ) {
    if (! isset( $response['body'] ) ) {
        return '';
    }

    return $response['body'];
}


/**
 * CURL POST数据
 * @param  string  $url       发送地址
 * @param  array   $post_data 发送数组
 * @param  integer $timeout   超时秒
 * @param  string  $header    头信息
 * @return string
 */
function curlPost($url, $post_data=array(), $timeout=100,$header="") {
    $header=empty($header)?'':$header;
    $post_string = http_build_query($post_data);
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_POST, true);
    curl_setopt($ch, CURLOPT_POSTFIELDS, $post_string);
    curl_setopt($ch, CURLOPT_URL, $url);

    curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);

    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    //curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
    //curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
    curl_setopt($ch, CURLOPT_REFERER, $_SERVER['HTTP_HOST']);
    curl_setopt($ch, CURLOPT_HTTPHEADER, array($header));//模拟的header头
    $result = curl_exec($ch);
    $error=curl_errno($ch);
    curl_close($ch);

    echo "<pre>";
    print_r($error);
    echo "</pre>";
    return $result;
}


function wp_aatags_html2text($ep) {
    $search = array("'<script[^>]*?>.*?</script>'si", "'<[\/\!]*?[^<>]*?>'si", "'([\r\n])[\s]+'", "'&(quot|#34|#034|#x22);'i", "'&(amp|#38|#038|#x26);'i", "'&(lt|#60|#060|#x3c);'i", "'&(gt|#62|#062|#x3e);'i", "'&(nbsp|#160|#xa0);'i", "'&(iexcl|#161);'i", "'&(cent|#162);'i", "'&(pound|#163);'i", "'&(copy|#169);'i", "'&(reg|#174);'i", "'&(deg|#176);'i", "'&(#39|#039|#x27);'", "'&(euro|#8364);'i", "'&a(uml|UML);'", "'&o(uml|UML);'", "'&u(uml|UML);'", "'&A(uml|UML);'", "'&O(uml|UML);'", "'&U(uml|UML);'", "'&szlig;'i");
    $replace = array("", "", "\\1", "\"", "&", "<", ">", " ", chr(161), chr(162), chr(163), chr(169), chr(174), chr(176), chr(39), chr(128), "ä", "ö", "ü", "Ä", "Ö", "Ü", "ß");
    return preg_replace($search, $replace, $ep);
}

function wp_aatags_sanitize($taglist) {
    $special_chars = array('?', '、', '。', '“', '”', '《', '》', '!', ',', ':', '?', '.', '[', ']', '/', '\\', '\=', '<', '>', ':', ';', '\'', '"', '&', '$', '#', '*', '(', ')', '|', '~', '`', '!', '{', '}', '%', '+', chr(0));
    /**
     * Filter the list of characters to remove from a taglist.
     * @param array  $special_chars Characters to remove.
     */
    $taglist = preg_replace("#\x{00a0}#siu", ' ', $taglist);
    $taglist = str_replace($special_chars, '', $taglist);
    $taglist = str_replace(array('%20', '+'), '-', $taglist);
    $taglist = preg_replace('/[\d]+/', '', $taglist);
    $taglist = preg_replace('/[\r\n\t -]+/', '-', $taglist);
    $taglist = trim($taglist, ',-_');
    return $taglist;
}

function wp_aatags_keycontents($keys, $num) {
    $request = curlPost('https://cws.9sep.org/extract/json', array('text' => $keys, 'topk' => $num),array('Content-Type: application/json'));
    echo "<pre>";
    print_r($request);
    echo "</pre>";
    exit;
    if (! isset( $response['response'] ) || ! is_array( $response['response'] )  && $response['response'] != 200) {
        return 'rEr';
    }else{
        return wp_remote_retrieve_body($request);
    }


}

$content='内容';
$body = wp_aatags_keycontents(wp_aatags_html2text($content), 1);
wp_aatags_keycontents($content,3);

?>

 

<?php


/**
* Retrieve only the body from the raw response.
*
* @since 2.7.0
*
* @param array|WP_Error $response HTTP response.
* @return string The body of the response. Empty string if no body or incorrect parameter given.
*/
function wp_remote_retrieve_body( $response ) {
if (! isset( $response['body'] ) ) {
return '';
}

return $response['body'];
}


/**
* CURL GET数据
* @param string $url 访问地址
* @param integer $timeout 超时秒
* @param string $header 头信息
* @return string
*/
/**
* CURL POST数据
* @param string $url 发送地址
* @param array $post_data 发送数组
* @param integer $timeout 超时秒
* @param string $header 头信息
* @return string
*/
function curlPost($url, $post_data=array(), $timeout=100,$header="") {
$header=empty($header)?'':$header;
$post_string = http_build_query($post_data);
$ch = curl_init();
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_string);
curl_setopt($ch, CURLOPT_URL, $url);

curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);

curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
//curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
//curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_REFERER, $_SERVER['HTTP_HOST']);
curl_setopt($ch, CURLOPT_HTTPHEADER, array($header));//模拟的header
$result = curl_exec($ch);
$error=curl_errno($ch);
curl_close($ch);

echo "<pre>";
print_r($error);
echo "</pre>";
return $result;
}


function wp_aatags_html2text($ep) {
$search = array("'<script[^>]*?>.*?</script>'si", "'<[\/\!]*?[^<>]*?>'si", "'([\r\n])[\s]+'", "'&(quot|#34|#034|#x22);'i", "'&(amp|#38|#038|#x26);'i", "'&(lt|#60|#060|#x3c);'i", "'&(gt|#62|#062|#x3e);'i", "'&(nbsp|#160|#xa0);'i", "'&(iexcl|#161);'i", "'&(cent|#162);'i", "'&(pound|#163);'i", "'&(copy|#169);'i", "'&(reg|#174);'i", "'&(deg|#176);'i", "'&(#39|#039|#x27);'", "'&(euro|#8364);'i", "'&a(uml|UML);'", "'&o(uml|UML);'", "'&u(uml|UML);'", "'&A(uml|UML);'", "'&O(uml|UML);'", "'&U(uml|UML);'", "'&szlig;'i");
$replace = array("", "", "\\1", "\"", "&", "<", ">", " ", chr(161), chr(162), chr(163), chr(169), chr(174), chr(176), chr(39), chr(128), "ä", "ö", "ü", "Ä", "Ö", "Ü", "ß");
return preg_replace($search, $replace, $ep);
}

function wp_aatags_sanitize($taglist) {
$special_chars = array('?', '', '', '“', '”', '', '', '', '', '', '', '.', '[', ']', '/', '\\', '\=', '<', '>', ':', ';', '\'', '"', '&', '$', '#', '*', '(', ')', '|', '~', '`', '!', '{', '}', '%', '+', chr(0));
/**
* Filter the list of characters to remove from a taglist.
* @param array $special_chars Characters to remove.
*/
$taglist = preg_replace("#\x{00a0}#siu", ' ', $taglist);
$taglist = str_replace($special_chars, '', $taglist);
$taglist = str_replace(array('%20', '+'), '-', $taglist);
$taglist = preg_replace('/[\d]+/', '', $taglist);
$taglist = preg_replace('/[\r\n\t -]+/', '-', $taglist);
$taglist = trim($taglist, ',-_');
return $taglist;
}

function wp_aatags_keycontents($keys, $num) {
$request = curlPost('https://cws.9sep.org/extract/json', array('text' => $keys, 'topk' => $num),array('Content-Type: application/json'));
echo "<pre>";
print_r($request);
echo "</pre>";
exit;
if (! isset( $response['response'] ) || ! is_array( $response['response'] ) && $response['response'] != 200) {
return 'rEr';
}else{
return wp_remote_retrieve_body($request);
}


}

$content='内容';
$body = wp_aatags_keycontents(wp_aatags_html2text($content), 1);
wp_aatags_keycontents($content,3);

?>
一个96年的PHPER

更多文章请关注《万象专栏》