<?php

/**
 * Text keywords extraction.
 *
 * Copyright © 2023 Vitaly Buzin. All rights reserved.
 * See LICENSE.txt for license details.
 */
declare( strict_types=1 );

if ( ! defined( 'ABSPATH' ) ) exit; // Exit if accessed directly

class Epnab_Keywords {
	/**
	 * @var array
	 */
	protected array $keywords = [];
	/**
	 * @var array
	 */
	protected array $stopwords = [];

	/**
	 * @param string $text
	 *
	 * @return void
	 */
	public function extract( string $text ): void {
		$words = mb_split( "\s+", mb_strtolower( $this->sanitize( $text ) ) );
		foreach ( $words as $word ) {
			$word = trim( $word );
			if ( ! in_array( $word, $this->stopwords ) && mb_strlen( $word ) ) {
				$this->keywords[ $word ] = isset( $this->keywords[ $word ] ) ? $this->keywords[ $word ] + 1 : 1;
			}
		}
	}

	/**
	 * @param string $words
	 * @param string $separator
	 *
	 * @return void
	 */
	public function setStopwords( string $words, string $separator ): void {
		$this->stopwords = explode( $separator, mb_strtolower( $words ) );
	}

	/**
	 * @param int|null $limit
	 *
	 * @return array
	 */
	public function getTop( int $limit = null ): array {
		arsort( $this->keywords, SORT_NUMERIC );
		$keywords = array_slice( $this->keywords, 0, $limit, true );

		return array_keys( $keywords );
	}

	/**
	 * @param string $text
	 *
	 * @return string
	 */
	protected function sanitize( string $text ): string {
		return mb_eregi_replace( '[^\p{L}\p{N}\_\s\-]', ' ', $text );
	}
}