Skip to content

Commit 59a8e75

Browse files
committed
remove inverted index feature
1 parent 7842ea9 commit 59a8e75

29 files changed

+146
-975
lines changed

.travis.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
language: php
22

33
php:
4-
- 5.5
5-
- 5.6
64
- 7.0
5+
- 7.1
76
- hhvm
87

98
before_script:

README.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,25 +16,36 @@ composer require yooper/php-text-analysis
1616
Documentation for the library resides in the wiki.
1717
https://github.com/yooper/php-text-analysis/wiki
1818

19+
20+
21+
1922
Dictionary Installation
2023
=============
2124

2225
Not required unless you use the dictionary stemmers
2326

24-
*For Ubuntu*
27+
*For Ubuntu < 16*
2528
```
2629
sudo apt-get install libpspell-dev
2730
sudo apt-get install php5-pspell
2831
sudo apt-get install aspell-en
2932
sudo apt-get install php5-enchant
3033
```
34+
*For Ubuntu >= 16*
35+
```
36+
sudo apt-get install libpspell-dev php7.0-pspell aspell-en php7.0-enchant
37+
```
38+
39+
3140
*For Centos*
3241
```
3342
sudo yum install php5-pspell
3443
sudo yum install aspell-en
3544
sudo yum install php5-enchant
3645
```
3746

47+
*PHP Pecl Stem* is not currently available in php 7.0.
48+
3849

3950
Tokenize
4051
=============

composer.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
"symfony/console": ">=2.7"
3030
},
3131
"require-dev": {
32-
"phpunit/phpunit": "4.*",
33-
"mockery/mockery" : "0.9.4"
32+
"phpunit/phpunit": "5.*",
33+
"mockery/mockery" : "0.9.7"
3434
}
3535
}

src/Console/Commands/NltkPackageInstallCommand.php

Lines changed: 78 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,30 @@
66
use Symfony\Component\Console\Input\InputArgument;
77
use Symfony\Component\Console\Input\InputInterface;
88
use Symfony\Component\Console\Output\OutputInterface;
9+
use Symfony\Component\Console\Helper\ProgressBar;
910

1011
use TextAnalysis\Downloaders\DownloadPackageFactory as DPF;
1112
use TextAnalysis\Downloaders\NltkCorporaIndexDownloader;
1213

1314

15+
1416
/**
1517
* Installs the selected nltk corpus package
1618
*
1719
* @author yooper
1820
*/
1921
class NltkPackageInstallCommand extends Command
2022
{
23+
/**
24+
* @var ProgressBar
25+
*/
26+
protected $progressBar = null;
27+
28+
/**
29+
* @var \Symfony\Component\Console\Output\OutputInterface
30+
*/
31+
private $output;
32+
2133
protected function configure()
2234
{
2335
$this->setName('pta:install:package')
@@ -31,25 +43,88 @@ protected function configure()
3143

3244
protected function execute(InputInterface $input, OutputInterface $output)
3345
{
46+
$this->output = $output;
3447
$packageId = $input->getArgument('package');
3548

3649
$listPackages = (new NltkCorporaIndexDownloader())->getPackages();
3750

38-
$packageFound = false;
51+
$packageFound = null;
3952

4053
foreach($listPackages as $package)
4154
{
4255
if($packageId == $package->getId()) {
43-
$packageFound = true;
44-
$download = DPF::download($package);
56+
$packageFound = $package;
4557
break;
4658
}
4759
}
4860

4961
if(!$packageFound) {
5062
$output->writeln("Package {$packageId} was not found, try textconsole pta:list, to see the available packages");
5163
} else {
64+
65+
$download = DPF::download($package);
66+
// Create stream context.
67+
$context = stream_context_create([], ['notification' => [$this, 'progress']]);
68+
69+
// Pipe file.
70+
$resource = fopen($packageFound->getUrl(), 'r', null, $context);
71+
$stream = fopen($download->getDownloadFullPath(), 'w+');
72+
if (!$stream) {
73+
$output->writeln("Package {$packageFound->getId()} - {$packageFound->getName()} install failed, permission denied to create file into {$download->getDownloadFullPath()}");
74+
}
75+
76+
stream_copy_to_stream($resource, $stream);
77+
78+
if (!fclose($stream)) {
79+
$output->writeln("Could not save file {$download->getDownloadFullPath()}");
80+
}
81+
82+
// End output.
83+
$this->progressBar->finish();
84+
85+
if(!$download->verifyChecksum()) {
86+
$output->writeln("Bad checksum for the downloaded package {$packageFound->getId()}");
87+
exit;
88+
}
89+
$download->unpackPackage();
90+
$output->writeln(PHP_EOL);
5291
$output->writeln("Package {$package->getId()} - {$package->getName()} was installed into {$download->getInstallDir()}");
5392
}
5493
}
94+
95+
/**
96+
* @param int $notificationCode
97+
* @param int $severity
98+
* @param string $message
99+
* @param int $messageCode
100+
* @param int $bytesTransferred
101+
* @param int $bytesMax
102+
*/
103+
public function progress($notificationCode, $severity, $message, $messageCode, $bytesTransferred, $bytesMax)
104+
{
105+
if (STREAM_NOTIFY_REDIRECTED === $notificationCode) {
106+
$this->progressBar->clear();
107+
$this->progressBar = null;
108+
return;
109+
}
110+
111+
if (STREAM_NOTIFY_FILE_SIZE_IS === $notificationCode) {
112+
if ($this->progressBar) {
113+
$this->progressBar->clear();
114+
}
115+
$this->progressBar = new ProgressBar($this->output, $bytesMax);
116+
}
117+
118+
if (STREAM_NOTIFY_PROGRESS === $notificationCode) {
119+
if (is_null($this->progressBar)) {
120+
$this->progressBar = new ProgressBar($this->output);
121+
}
122+
$this->progressBar->setProgress($bytesTransferred);
123+
}
124+
125+
if (STREAM_NOTIFY_COMPLETED === $notificationCode) {
126+
$this->finish($bytesTransferred);
127+
}
128+
}
129+
55130
}

src/Console/Commands/StemmerExceptionListCommand.php

Lines changed: 0 additions & 101 deletions
This file was deleted.

src/Downloaders/DownloadPackageFactory.php

Lines changed: 5 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,7 @@ protected function __construct(Package $package)
3131
if(file_exists($this->getDownloadFullPath()) && $this->verifyChecksum()) {
3232
return;
3333
}
34-
35-
$this->downloadRemoteFile();
36-
37-
if($this->verifyChecksum()) {
38-
throw new Exception("Bad checksum for the downloaded package {$this->getPackage()->getId()}");
39-
}
40-
41-
$this->unpackPackage();
34+
4235
}
4336

4437
/**
@@ -52,22 +45,24 @@ static public function download(Package $package)
5245
}
5346

5447
/**
55-
* Verify the packages checksum against the downloaded file, if it exists
48+
* Verify the packages checksum against the downloaded file
49+
* if the package has a checksum
5650
* @return boolean
5751
*/
5852
public function verifyChecksum()
5953
{
6054
if(empty($this->getPackage()->getChecksum())) {
6155
return true;
6256
}
57+
6358
return $this->getPackage()->getChecksum() === md5($this->getDownloadFullPath());
6459
}
6560

6661
/**
6762
* de-compress the downloaded corpus into the install directory, or
6863
* copy the files into the install directory
6964
*/
70-
protected function unpackPackage()
65+
public function unpackPackage()
7166
{
7267
// it is zipped, we must unzip it
7368
if($this->getPackage()->getUnzip()) {
@@ -150,21 +145,6 @@ public function initialize()
150145

151146
}
152147

153-
/**
154-
* @todo improve downloader code, make it more robust
155-
*/
156-
protected function downloadRemoteFile()
157-
{
158-
$handle = fopen($this->getPackage()->getUrl(), "rb");
159-
$fp = fopen($this->getDownloadFullPath(), 'w');
160-
$content = '';
161-
while (!feof($handle)) {
162-
$content = fread($handle, 8192);
163-
fwrite($fp, $content);
164-
}
165-
fclose($handle);
166-
fclose($fp);
167-
}
168148

169149
/**
170150
* Has the full path to where the download should go

src/Downloaders/NltkCorporaIndexDownloader.php

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ public function getPackages()
5252
{
5353
$data = (array)$package;
5454
extract($data['@attributes']);
55+
// checksums may not exist on some remote packages
56+
if(!isset($checksum)) {
57+
$checksum = null;
58+
}
5559
$this->packages[] = new Package($id, $checksum, $name, $subdir, $unzip, $url);
5660
}
5761
}

0 commit comments

Comments
 (0)