Spaces:
Runtime error
Runtime error
| import logging | |
| from argparse import ArgumentParser | |
| from transformers import AutoTokenizer | |
| from pyserini.search import SimpleSearcher | |
| from pyserini.analysis import JWhiteSpaceAnalyzer | |
| # logger = logging.getLogger(__name__) | |
| logger = logging.getLogger(os.path.basename(__file__)) | |
| logger.setLevel(logging.INFO) | |
| def main(args): | |
| query = args.query | |
| index = args.index | |
| if args.do_tokenize: | |
| tokenizer = AutoTokenizer.from_pretrained('bert-multilingual-base-uncased') | |
| query = " ".join(tokenizer.tokenize(query)) | |
| logger.info(f'searching for: {query}') | |
| searcher = SimpleSearcher(index) | |
| searcher.set_analyzer(JWhiteSpaceAnalyzer()) | |
| hits = searcher.search(query, 1000) | |
| for i in range(len(hits)): | |
| doc = hits[i] | |
| print(f'{i+1:2} {hits[i].docid:4} {hits[i].score:.5f}') | |
| if __name__ == "__main__": | |
| parser = ArgumentParser() | |
| parser.add_argument('--query', '-q', type=str, required=True, help="The query to search in the index") | |
| parser.add_argument('--index', '-i', type=str, required=True, help="Path to the anserini index directory") | |
| parser.add_argument('--do-tokenize', '-t', action='store_false', help="Whether to perform mbert tokenization on the query") | |
| args = parser.parse_args() | |
| main(args) | |