FileIndexer.cpp

Go to the documentation of this file.
00001 /*
00002 
00003 Copyright (C) 2005-2007 by Peter Dimov.
00004 
00005 This file is part of Calitko (http://www.calitko.org).
00006 
00007 Calitko is free software; you can redistribute it and/or modify
00008 it under the terms of the GNU General Public License as published by
00009 the Free Software Foundation; either version 2 of the License, or
00010 (at your option) any later version.
00011 
00012 Calitko is distributed in the hope that it will be useful,
00013 but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 GNU General Public License for more details.
00016 
00017 You should have received a copy of the GNU General Public License
00018 along with Calitko; if not, write to the Free Software
00019 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00020 
00021 */
00022 
00023 #include "Qt.h"
00024 #include "FileIndexer.h"
00025 #include "FileInfo.h"
00026 //#include "Gnutella/PacketProcessing/QueryRoutingFilter.h"
00027 
00028 using Gnutella::Searching::FileIndexer;
00029 using Gnutella::Searching::FileInfo;
00030 //using Gnutella::PacketProcessing::QueryRoutingFilter;
00031 
00032 #define REFRESH_INTERVAL_MIN        100
00033 
00034 QDataStream & operator<< (QDataStream &stream, QMap<uint, FileInfo*> &files)
00035 {
00036     using namespace Gnutella::Searching;
00037 
00038     QList<uint> fileIndexes = files.keys();
00039 
00040     stream << fileIndexes.count();
00041 
00042     foreach (uint fileIndex, fileIndexes) {
00043         stream << fileIndex << *(files.value (fileIndex));
00044     }
00045     return stream;
00046 }
00047 
00048 QDataStream & operator>> (QDataStream &stream, QMap<uint, FileInfo*> &files)
00049 {
00050     using namespace Gnutella::Searching;
00051 
00052     uint fileIndex;
00053     int numEntries;
00054 
00055     stream >> numEntries;
00056 
00057 
00058     for (int i=0; i<numEntries; i++) {
00059         FileInfo *fileInfo = new FileInfo();
00060         stream >> fileIndex >> *fileInfo;
00061 //      qDebug() << fileInfo->fileName();
00062 //      qDebug() << fileInfo->fileIndex();
00063         files.insert (fileIndex, fileInfo);
00064     }
00065     return stream;
00066 }
00067 
00068 QDataStream & operator<< (QDataStream &stream, QMap<QString, FileInfo*> &fileWords)
00069 {
00070     QList<QString> words = fileWords.keys().toSet().toList();       // take the words without repetitions
00071     QList<FileInfo*> fileInfos;
00072 
00073     stream << words.count();
00074 
00075     foreach (QString word, words) {
00076         fileInfos = fileWords.values (word);
00077 
00078         stream << word;
00079         stream << fileInfos.count();
00080 
00081         foreach (FileInfo* fileInfo, fileInfos) {
00082             stream << fileInfo->fileIndex();
00083 //          qDebug() << fileInfo->fileName();
00084 //          qDebug() << fileInfo->fileIndex();
00085         }
00086     }
00087 
00088     return stream;
00089 }
00090 
00091 QDataStream& FileIndexer::readFileWords (QDataStream &stream)
00092 {
00093     int numWords;
00094     stream >> numWords;
00095 
00096     int numInfos;
00097     uint fileIndex;
00098     QString word;
00099     for (int i=0; i<numWords; i++) {
00100         stream >> word;
00101         stream >> numInfos;
00102 
00103 //      qDebug() << word;
00104         //p.queryRoutingFilter->addToQueryRoutingTable (word);
00105 
00106         for (int j=0; j<numInfos; j++) {
00107             stream >> fileIndex;
00108             Q_ASSERT (p.files.contains (fileIndex));
00109             p.fileWords.insertMulti (word, p.files.value (fileIndex));
00110 
00111 //          qDebug() << p.files.value (fileIndex)->fileName();
00112         }
00113     }
00114 
00115     return stream;
00116 }
00117 
00118 
00119 QDataStream & operator<< (QDataStream &stream, QList<QDir> &dirs)
00120 {
00121     stream << dirs.count();
00122 
00123     foreach (QDir dir, dirs) {
00124         stream << dir.absolutePath();
00125     }
00126 
00127     return stream;
00128 }
00129 
00130 QDataStream & operator>> (QDataStream &stream, QList<QDir> &dirs)
00131 {
00132     int numDirs;
00133     stream >> numDirs;
00134 
00135     QString dirPath;
00136     for (int i=0; i<numDirs; i++) {
00137         stream >> dirPath;
00138 //      qDebug() << dirPath;
00139         dirs.append (QDir (dirPath));
00140     }
00141 
00142     return stream;
00143 }
00144 
00145 FileIndexer::FileIndexer (/*QueryRoutingFilter *queryRoutingFilter*/)
00146  : p()
00147 {
00148     p.checkThread = new CheckIndexThread (this);
00149 
00150     p.refreshTimer = new QTimer (this);
00151     QObject::connect (p.refreshTimer, SIGNAL (timeout()), p.checkThread, SLOT(start()));
00152     p.refreshTimer->setInterval (REFRESH_INTERVAL_MIN * 60000);
00153     p.refreshTimer->start();
00154     //p.queryRoutingFilter = queryRoutingFilter;
00155 
00156     QFile file ("fileindex.dat");
00157 
00158     if (!file.open (QIODevice::ReadOnly))
00159         return;
00160 
00161     QDataStream stream (&file);
00162 
00163     stream >> p.files;
00164     stream >> p.sharedFolders;
00165     readFileWords (stream);
00166 
00167     p.checkThread->start();
00168     //refreshIndex();
00169 }
00170 
00171 FileIndexer::~FileIndexer()
00172 {
00173     p.refreshTimer->stop();
00174     delete p.refreshTimer;
00175     // <sm_todo> write the index to a file
00176 
00177     QFile file ("fileindex.dat");
00178 
00179     if (!file.open (QIODevice::WriteOnly))
00180         return;
00181 
00182     QDataStream stream (&file);
00183 
00184     stream << p.files;
00185     stream << p.sharedFolders;
00186     stream << p.fileWords;
00187 
00188 }
00189 
00190 void FileIndexer::refreshIndex()
00191 {
00192     QTime time;
00193     time.start();
00194     QList<uint> fileIndexes;
00195 
00196     foreach (QDir sharedDir, p.sharedFolders) {
00197         sharedDir.refresh();
00198         qDebug() << sharedDir.absolutePath();
00199         if (!QFileInfo(sharedDir.absolutePath()).exists()) {
00200             removeFolder (sharedDir.absolutePath());
00201             continue;
00202         }
00203 
00204         QList<QString> fileEntries = sharedDir.entryList (QDir::Files, QDir::Name);
00205 
00206         foreach (QString fileName, fileEntries) {
00207     //      qDebug() << sharedDir.absoluteFilePath (fileName);
00208             FileInfo *fileInfo = new FileInfo (sharedDir.absoluteFilePath (fileName));
00209 
00210             fileIndexes.append (fileInfo->fileIndex());
00211 
00212             if (!p.files.value (fileInfo->fileIndex())) {
00213                 indexFile (fileInfo);
00214             } else {
00215                 delete fileInfo;
00216             }
00217 
00218         }
00219     }
00220 
00221     QList<uint> filesToRemove = (p.files.keys().toSet().subtract (fileIndexes.toSet())).toList();
00222 
00223     foreach (uint fileIndex, filesToRemove) {
00224         removeFile (p.files.value (fileIndex));
00225     }
00226 
00227     checkForAddedFolder();
00228     qDebug() << "Refreshing file index finished!  " << time.elapsed() << " ms";
00229 }
00230 
00231 void FileIndexer::checkForAddedFolder()
00232 {
00233     foreach (QDir sharedDir, p.sharedFolders) {
00234         QList<QString> dirEntries = sharedDir.entryList (QDir::Dirs | QDir::NoSymLinks, QDir::Name);
00235 
00236         qDebug() << sharedDir.absolutePath();
00237         Q_ASSERT (sharedDir.exists());
00238         dirEntries.pop_front();     // .
00239         dirEntries.pop_front();     // ..
00240 
00241         foreach (QString dir, dirEntries) {
00242             if (!p.sharedFolders.contains (QDir (sharedDir.absoluteFilePath (dir)))) {
00243                 indexFolder (sharedDir.absoluteFilePath (dir));
00244             }
00245         }
00246     }
00247 }
00248 
00249 bool FileIndexer::indexFolder (QString folderPath)
00250 {
00251     QDir sharedDir (folderPath);
00252 
00253     if (!sharedDir.exists()) {
00254         return false;
00255     }
00256 
00257     if (p.sharedFolders.contains (sharedDir)) {
00258         return true;
00259     }
00260 
00261     qDebug() <<sharedDir.absolutePath();
00262     p.sharedFolders.append (sharedDir);
00263 
00264     QList<QString> fileEntries = sharedDir.entryList (QDir::Files, QDir::Name);
00265 
00266     foreach (QString fileName, fileEntries) {
00267         FileInfo *fileInfo = new FileInfo (sharedDir.absoluteFilePath (fileName));
00268         indexFile (fileInfo);
00269     }
00270 
00271     QList<QString> dirEntries = sharedDir.entryList (QDir::Dirs | QDir::NoSymLinks, QDir::Name);
00272 
00273     dirEntries.pop_front();     // .
00274     dirEntries.pop_front();     // ..
00275 
00276     foreach (QString dirName, dirEntries) {
00277         indexFolder (sharedDir.absoluteFilePath (dirName));
00278     }
00279 
00280     return true;
00281 }
00282 
00283 void FileIndexer::indexFile (QString filePath)
00284 {
00285     if (! QFile::exists (filePath)) {
00286         return;
00287     }
00288 
00289     FileInfo *fileInfo = new FileInfo (filePath);
00290 
00291     indexFile (fileInfo);
00292 }
00293 
00294 void FileIndexer::indexFile (FileInfo *fileInfo)
00295 {
00296     Q_ASSERT (fileInfo->exists());
00297 
00298     p.files.insert (fileInfo->fileIndex(), fileInfo);
00299 
00300     QString fileName = QFileInfo (fileInfo->fileName()).fileName();
00301     QList<QString> fileNameWords = fileName.toLower().split (QRegExp ("(\\s+|-|_|\\W)+"));
00302 
00303     foreach (QString word, fileNameWords) {
00304         if (word.isEmpty()) {
00305             continue;
00306         }
00307         p.fileWords.insertMulti (word, fileInfo);
00308         //p.queryRoutingFilter->addToQueryRoutingTable (word);
00309     }
00310 }
00311 
00312 bool FileIndexer::removeFolder (QString folderPath)
00313 {
00314     QDir sharedDirToBeRemoved (folderPath);
00315 
00316     p.sharedFolders.removeAt (p.sharedFolders.indexOf (sharedDirToBeRemoved));
00317     removeFolderFiles (folderPath);
00318 
00319     if (!sharedDirToBeRemoved.exists()) {
00320         return true;
00321     }
00322 
00323     QList<QString> dirEntries = sharedDirToBeRemoved.entryList (QDir::Dirs | QDir::NoSymLinks, QDir::Name);
00324 
00325     dirEntries.pop_front();
00326     dirEntries.pop_front();
00327 
00328     foreach (QString dirName, dirEntries) {
00329         removeFolder (sharedDirToBeRemoved.absoluteFilePath (dirName));
00330     }
00331 
00332     return true;
00333 }
00334 
00335 void FileIndexer::removeFolderFiles (QString folderPath)
00336 {
00337     QList<QString> words = p.fileWords.keys();
00338 
00339     foreach (QString word, words) {
00340         QList<FileInfo*> fileInfosForWord = p.fileWords.values (word);
00341 
00342         p.fileWords.remove (word);
00343 
00344         foreach (FileInfo *fileInfo, fileInfosForWord) {
00345             if (QFileInfo (fileInfo->fileName()).absoluteDir().absolutePath() == folderPath) {
00346                 removeFile (fileInfo);
00347             } else {
00348                 p.fileWords.insertMulti (word, fileInfo);
00349             }
00350         }
00351     }
00352 }
00353 
00354 void FileIndexer::removeFile (FileInfo *fileInfo)
00355 {
00356 //  Q_ASSERT (fileInfo->exists());
00357 
00358     QString fileName = QFileInfo (fileInfo->fileName()).fileName();
00359     QList<QString> fileNameWords = fileName.toLower().split (QRegExp ("(\\s+|-|_|\\W)+"));
00360 
00361     foreach (QString word, fileNameWords) {
00362         if (word.isEmpty()) {
00363             continue;
00364         }
00365         removeFileFromWord (fileInfo->fileIndex(), word);
00366     }
00367 
00368     p.files.remove (fileInfo->fileIndex());
00369     delete fileInfo;
00370 }
00371 
00372 void FileIndexer::removeFileFromWord (uint fileIndex, QString &word)
00373 {
00374     QList<FileInfo*> fileInfosForWord = p.fileWords.values (word);
00375 
00376     QList<FileInfo*>::iterator it = fileInfosForWord.begin();
00377 
00378     for (;it<fileInfosForWord.end(); it++) {
00379         if ((*it)->fileIndex() == fileIndex) {
00380             fileInfosForWord.erase (it);
00381             break;
00382         }
00383     }
00384 
00385     p.fileWords.remove (word);
00386 
00387     if (fileInfosForWord.isEmpty()) {
00388         //p.queryRoutingFilter->removeFromQueryRoutingTable (word);
00389     } else {
00390         foreach (FileInfo *fileInfo, fileInfosForWord) {
00391             p.fileWords.insertMulti (word, fileInfo);
00392         }
00393     }
00394 }
00395 
00396 QList<FileInfo*> FileIndexer::match (QString searchCriteria) const
00397 {
00398     QList<FileInfo*> fileMatches;
00399     QList<FileInfo*> tempFileInfos;
00400 
00401     QList<QString> searchKeyWords = searchCriteria.split (QRegExp ("\\s+"));
00402 
00403     fileMatches = p.fileWords.values (searchKeyWords.first());
00404     searchKeyWords.pop_front();
00405 
00406     foreach (QString keyWord, searchKeyWords) {
00407         tempFileInfos = p.fileWords.values (keyWord);
00408         fileMatches = (fileMatches.toSet().intersect (tempFileInfos.toSet())).toList();
00409     }
00410 
00411     return fileMatches;
00412 }
00413 
00414 void FileIndexer::CheckIndexThread::run()
00415 {
00416     fileIndexer->refreshIndex();
00417 }
00418 FileIndexer::CheckIndexThread::CheckIndexThread (FileIndexer *fileIndexer)
00419  :  fileIndexer (fileIndexer)
00420 {
00421 }