Skip to content

Commit

Permalink
Release a Chinese MRP model with Mengzi PLM
Browse files Browse the repository at this point in the history
  • Loading branch information
hankcs committed Apr 15, 2022
1 parent 50184ee commit 3be983d
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 3 deletions.
10 changes: 9 additions & 1 deletion docs/references.bib
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
%% This BibTeX bibliography file was created using BibDesk.
%% https://bibdesk.sourceforge.io/
%% Created for hankcs at 2022-04-12 22:36:24 -0400
%% Created for hankcs at 2022-04-15 10:32:15 -0400
%% Saved with string encoding Unicode (UTF-8)
@article{zhang2021mengzi,
author = {Zhang, Zhuosheng and Zhang, Hanqing and Chen, Keming and Guo, Yuhang and Hua, Jingyun and Wang, Yulong and Zhou, Ming},
date-added = {2022-04-15 10:32:14 -0400},
date-modified = {2022-04-15 10:32:14 -0400},
journal = {arXiv preprint arXiv:2110.06696},
title = {Mengzi: Towards Lightweight yet Ingenious Pre-trained Models for Chinese},
year = {2021}}

@inproceedings{samuel-straka-2020-ufal,
abstract = {We present PERIN, a novel permutation-invariant approach to sentence-to-graph semantic parsing. PERIN is a versatile, cross-framework and language independent architecture for universal modeling of semantic structures. Our system participated in the CoNLL 2020 shared task, Cross-Framework Meaning Representation Parsing (MRP 2020), where it was evaluated on five different frameworks (AMR, DRG, EDS, PTG and UCCA) across four languages. PERIN was one of the winners of the shared task. The source code and pretrained models are available at http://www.github.com/ufal/perin.},
address = {Online},
Expand Down
8 changes: 8 additions & 0 deletions hanlp/pretrained/amr.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,13 @@
provided as inputs.
'''

MRP2020_AMR_ZHO_MENGZI_BASE = 'http://download.hanlp.com/amr/extra/amr-zho-mengzi-base_20220415_101941.zip'
'''A Chinese Permutation-invariant Semantic Parser (:cite:`samuel-straka-2020-ufal`) trained on MRP2020
Chinese AMR corpus using Mengzi BERT base (:cite:`zhang2021mengzi`). Its performance on dev set is
``{amr-zho [tops F1: 85.43%][anchors F1: 93.41%][labels F1: 87.68%][properties F1: 82.02%][edges F1: 73.17%]
[attributes F1: 0.00%][all F1: 84.11%]}``. Test set performance is unknown since the test set is not released to the
public.
'''

# Will be filled up during runtime
ALL = {}
2 changes: 1 addition & 1 deletion hanlp/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Author: hankcs
# Date: 2019-12-28 19:26

__version__ = '2.1.0-beta.23'
__version__ = '2.1.0-beta.24'
"""HanLP version"""


Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
'amr': [
'penman==1.2.1',
'networkx>=2.5.1',
'perin-parser>=0.0.10',
'perin-parser>=0.0.12',
],
'tf': [
'fasttext-wheel==0.9.2',
Expand Down

0 comments on commit 3be983d

Please sign in to comment.